diff options
author | dim <dim@FreeBSD.org> | 2011-05-02 19:34:44 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2011-05-02 19:34:44 +0000 |
commit | 2b066988909948dc3d53d01760bc2d71d32f3feb (patch) | |
tree | fc5f365fb9035b2d0c622bbf06c9bbe8627d7279 /lib | |
parent | c80ac9d286b8fcc6d1ee5d76048134cf80aa9edc (diff) | |
download | FreeBSD-src-2b066988909948dc3d53d01760bc2d71d32f3feb.zip FreeBSD-src-2b066988909948dc3d53d01760bc2d71d32f3feb.tar.gz |
Vendor import of llvm trunk r130700:
http://llvm.org/svn/llvm-project/llvm/trunk@130700
Diffstat (limited to 'lib')
541 files changed, 30343 insertions, 14884 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index be02ddb..c189a00 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -86,14 +86,20 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS, if (onlyAccessesArgPointees(MRB)) { bool doesAlias = false; - if (doesAccessArgPointees(MRB)) + if (doesAccessArgPointees(MRB)) { + MDNode *CSTag = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa); for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); - AI != AE; ++AI) - if (!isNoAlias(Location(*AI), Loc)) { + AI != AE; ++AI) { + const Value *Arg = *AI; + if (!Arg->getType()->isPointerTy()) + continue; + Location CSLoc(Arg, UnknownSize, CSTag); + if (!isNoAlias(CSLoc, Loc)) { doesAlias = true; break; } - + } + } if (!doesAlias) return NoModRef; } @@ -138,13 +144,19 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { // CS2's arguments. if (onlyAccessesArgPointees(CS2B)) { AliasAnalysis::ModRefResult R = NoModRef; - if (doesAccessArgPointees(CS2B)) + if (doesAccessArgPointees(CS2B)) { + MDNode *CS2Tag = CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa); for (ImmutableCallSite::arg_iterator I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) { - R = ModRefResult((R | getModRefInfo(CS1, *I, UnknownSize)) & Mask); + const Value *Arg = *I; + if (!Arg->getType()->isPointerTy()) + continue; + Location CS2Loc(Arg, UnknownSize, CS2Tag); + R = ModRefResult((R | getModRefInfo(CS1, CS2Loc)) & Mask); if (R == Mask) break; } + } return R; } @@ -152,13 +164,20 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { // any of the memory referenced by CS1's arguments. If not, return NoModRef. if (onlyAccessesArgPointees(CS1B)) { AliasAnalysis::ModRefResult R = NoModRef; - if (doesAccessArgPointees(CS1B)) + if (doesAccessArgPointees(CS1B)) { + MDNode *CS1Tag = CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa); for (ImmutableCallSite::arg_iterator - I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) - if (getModRefInfo(CS2, *I, UnknownSize) != NoModRef) { + I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) { + const Value *Arg = *I; + if (!Arg->getType()->isPointerTy()) + continue; + Location CS1Loc(Arg, UnknownSize, CS1Tag); + if (getModRefInfo(CS2, CS1Loc) != NoModRef) { R = Mask; break; } + } + } if (R == NoModRef) return R; } diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 3a46976d..2ed6949 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -602,6 +602,10 @@ void AliasSetTracker::ASTCallbackVH::deleted() { // this now dangles! } +void AliasSetTracker::ASTCallbackVH::allUsesReplacedWith(Value *V) { + AST->copyValue(getValPtr(), V); +} + AliasSetTracker::ASTCallbackVH::ASTCallbackVH(Value *V, AliasSetTracker *ast) : CallbackVH(V), AST(ast) {} diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 1af1c35..6ebe100 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -43,14 +43,12 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeLazyValueInfoPass(Registry); initializeLibCallAliasAnalysisPass(Registry); initializeLintPass(Registry); - initializeLiveValuesPass(Registry); initializeLoopDependenceAnalysisPass(Registry); initializeLoopInfoPass(Registry); initializeMemDepPrinterPass(Registry); initializeMemoryDependenceAnalysisPass(Registry); initializeModuleDebugInfoPrinterPass(Registry); initializePostDominatorTreePass(Registry); - initializePostDominanceFrontierPass(Registry); initializeProfileEstimatorPassPass(Registry); initializeNoProfileInfoPass(Registry); initializeNoPathProfileInfoPass(Registry); diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index f7bcd9e..f1bb8a3 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -350,7 +350,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, Scale *= IndexScale.getSExtValue(); - // If we already had an occurrance of this index variable, merge this + // If we already had an occurrence of this index variable, merge this // scale into it. For example, we want to handle: // A[x][x] -> x*16 + x*4 -> x*20 // This also ensures that 'x' only appears in the index list once. @@ -779,6 +779,26 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, return NoModRef; break; } + case Intrinsic::arm_neon_vld1: { + // LLVM's vld1 and vst1 intrinsics currently only support a single + // vector register. + uint64_t Size = + TD ? TD->getTypeStoreSize(II->getType()) : UnknownSize; + if (isNoAlias(Location(II->getArgOperand(0), Size, + II->getMetadata(LLVMContext::MD_tbaa)), + Loc)) + return NoModRef; + break; + } + case Intrinsic::arm_neon_vst1: { + uint64_t Size = + TD ? TD->getTypeStoreSize(II->getArgOperand(1)->getType()) : UnknownSize; + if (isNoAlias(Location(II->getArgOperand(0), Size, + II->getMetadata(LLVMContext::MD_tbaa)), + Loc)) + return NoModRef; + break; + } } // The AliasAnalysis base class has some smarts, lets use them. @@ -883,7 +903,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty()) return MustAlias; - // If there is a difference betwen the pointers, but the difference is + // If there is a difference between the pointers, but the difference is // less than the size of the associated memory object, then we know // that the objects are partially overlapping. if (GEP1BaseOffset != 0 && GEP1VariableIndices.empty()) { diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 1a738fa..6be5617 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -9,11 +9,11 @@ add_llvm_library(LLVMAnalysis CFGPrinter.cpp CaptureTracking.cpp ConstantFolding.cpp + DIBuilder.cpp DbgInfoPrinter.cpp DebugInfo.cpp - DIBuilder.cpp - DominanceFrontier.cpp DomPrinter.cpp + DominanceFrontier.cpp IVUsers.cpp InlineCost.cpp InstCount.cpp @@ -24,7 +24,6 @@ add_llvm_library(LLVMAnalysis LibCallAliasAnalysis.cpp LibCallSemantics.cpp Lint.cpp - LiveValues.cpp Loads.cpp LoopDependenceAnalysis.cpp LoopInfo.cpp @@ -33,11 +32,11 @@ add_llvm_library(LLVMAnalysis MemoryBuiltins.cpp MemoryDependenceAnalysis.cpp ModuleDebugInfoPrinter.cpp + NoAliasAnalysis.cpp + PHITransAddr.cpp PathNumbering.cpp PathProfileInfo.cpp PathProfileVerifier.cpp - NoAliasAnalysis.cpp - PHITransAddr.cpp PostDominators.cpp ProfileEstimatorPass.cpp ProfileInfo.cpp diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index 42a54d9..b2c27d1 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -17,6 +17,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Value.h" #include "llvm/Analysis/AliasAnalysis.h" diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index cd8d52c..5de2b04 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -23,6 +23,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" +#include "llvm/Operator.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallVector.h" @@ -1048,11 +1049,12 @@ llvm::canConstantFoldCallTo(const Function *F) { case Intrinsic::ctpop: case Intrinsic::ctlz: case Intrinsic::cttz: - case Intrinsic::uadd_with_overflow: - case Intrinsic::usub_with_overflow: case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: case Intrinsic::convert_from_fp16: case Intrinsic::convert_to_fp16: case Intrinsic::x86_sse_cvtss2si: @@ -1362,7 +1364,8 @@ llvm::ConstantFoldCall(Function *F, case Intrinsic::uadd_with_overflow: case Intrinsic::ssub_with_overflow: case Intrinsic::usub_with_overflow: - case Intrinsic::smul_with_overflow: { + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: { APInt Res; bool Overflow; switch (F->getIntrinsicID()) { @@ -1382,6 +1385,9 @@ llvm::ConstantFoldCall(Function *F, case Intrinsic::smul_with_overflow: Res = Op1->getValue().smul_ov(Op2->getValue(), Overflow); break; + case Intrinsic::umul_with_overflow: + Res = Op1->getValue().umul_ov(Op2->getValue(), Overflow); + break; } Constant *Ops[] = { ConstantInt::get(F->getContext(), Res), diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp index 590a9c1..dc98c9e 100644 --- a/lib/Analysis/DIBuilder.cpp +++ b/lib/Analysis/DIBuilder.cpp @@ -50,7 +50,7 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, MDString::get(VMContext, Flags), ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer) }; - TheCU = DICompileUnit(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + TheCU = DICompileUnit(MDNode::get(VMContext, Elts)); } /// createFile - Create a file descriptor to hold debugging information @@ -63,7 +63,7 @@ DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) { MDString::get(VMContext, Directory), TheCU }; - return DIFile(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIFile(MDNode::get(VMContext, Elts)); } /// createEnumerator - Create a single enumerator value. @@ -73,7 +73,7 @@ DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) { MDString::get(VMContext, Name), ConstantInt::get(Type::getInt64Ty(VMContext), Val) }; - return DIEnumerator(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIEnumerator(MDNode::get(VMContext, Elts)); } /// createBasicType - Create debugging information entry for a basic @@ -95,7 +95,7 @@ DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags; ConstantInt::get(Type::getInt32Ty(VMContext), Encoding) }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createQaulifiedType - Create debugging information entry for a qualified @@ -114,7 +114,7 @@ DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) { ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags FromTy }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createPointerType - Create debugging information entry for a pointer. @@ -133,7 +133,7 @@ DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits, ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags PointeeTy }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createReferenceType - Create debugging information entry for a reference. @@ -151,7 +151,7 @@ DIType DIBuilder::createReferenceType(DIType RTy) { ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags RTy }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createTypedef - Create debugging information entry for a typedef. @@ -171,7 +171,7 @@ DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File, ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags Ty }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createFriend - Create debugging information entry for a 'friend'. @@ -191,7 +191,7 @@ DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) { ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags FriendTy }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createInheritance - Create debugging information entry to establish @@ -211,7 +211,7 @@ DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, ConstantInt::get(Type::getInt32Ty(VMContext), Flags), BaseTy }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createMemberType - Create debugging information entry for a member. @@ -233,7 +233,36 @@ DIType DIBuilder::createMemberType(StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), Flags), Ty }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createObjCIVar - Create debugging information entry for Objective-C +/// instance variable. +DIType DIBuilder::createObjCIVar(StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, + DIType Ty, StringRef PropertyName, + StringRef GetterName, StringRef SetterName, + unsigned PropertyAttributes) { + // TAG_member is encoded in DIDerivedType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_member), + File, // Or TheCU ? Ty ? + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + Ty, + MDString::get(VMContext, PropertyName), + MDString::get(VMContext, GetterName), + MDString::get(VMContext, SetterName), + ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes) + }; + return DIType(MDNode::get(VMContext, Elts)); } /// createClassType - Create debugging information entry for a class. @@ -260,7 +289,7 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, VTableHoder, TemplateParams }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createTemplateTypeParameter - Create debugging information for template @@ -278,8 +307,7 @@ DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo) }; - return DITemplateTypeParameter(MDNode::get(VMContext, &Elts[0], - array_lengthof(Elts))); + return DITemplateTypeParameter(MDNode::get(VMContext, Elts)); } /// createTemplateValueParameter - Create debugging information for template @@ -299,8 +327,7 @@ DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo) }; - return DITemplateValueParameter(MDNode::get(VMContext, &Elts[0], - array_lengthof(Elts))); + return DITemplateValueParameter(MDNode::get(VMContext, Elts)); } /// createStructType - Create debugging information entry for a struct. @@ -325,7 +352,7 @@ DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createUnionType - Create debugging information entry for an union. @@ -350,7 +377,7 @@ DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createSubroutineType - Create subroutine type. @@ -371,7 +398,7 @@ DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) { ConstantInt::get(Type::getInt32Ty(VMContext), 0), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createEnumerationType - Create debugging information entry for an @@ -396,7 +423,7 @@ DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), 0), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + MDNode *Node = MDNode::get(VMContext, Elts); NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum"); NMD->addOperand(Node); return DIType(Node); @@ -421,7 +448,7 @@ DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, ConstantInt::get(Type::getInt32Ty(VMContext), 0), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createVectorType - Create debugging information entry for a vector. @@ -443,7 +470,7 @@ DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, ConstantInt::get(Type::getInt32Ty(VMContext), 0), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createArtificialType - Create a new DIType with "artificial" flag set. @@ -467,7 +494,7 @@ DIType DIBuilder::createArtificialType(DIType Ty) { // Flags are stored at this slot. Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags); - return DIType(MDNode::get(VMContext, Elts.data(), Elts.size())); + return DIType(MDNode::get(VMContext, Elts)); } /// retainType - Retain DIType in a module even if it is not referenced @@ -483,7 +510,7 @@ DIDescriptor DIBuilder::createUnspecifiedParameter() { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters) }; - return DIDescriptor(MDNode::get(VMContext, &Elts[0], 1)); + return DIDescriptor(MDNode::get(VMContext, Elts)); } /// createTemporaryType - Create a temporary forward-declared type. @@ -491,7 +518,7 @@ DIType DIBuilder::createTemporaryType() { // Give the temporary MDNode a tag. It doesn't matter what tag we // use here as long as DIType accepts it. Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; - MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts)); + MDNode *Node = MDNode::getTemporary(VMContext, Elts); return DIType(Node); } @@ -505,17 +532,17 @@ DIType DIBuilder::createTemporaryType(DIFile F) { NULL, F }; - MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts)); + MDNode *Node = MDNode::getTemporary(VMContext, Elts); return DIType(Node); } /// getOrCreateArray - Get a DIArray, create one if required. -DIArray DIBuilder::getOrCreateArray(Value *const *Elements, unsigned NumElements) { - if (NumElements == 0) { +DIArray DIBuilder::getOrCreateArray(ArrayRef<Value *> Elements) { + if (Elements.empty()) { Value *Null = llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)); - return DIArray(MDNode::get(VMContext, &Null, 1)); + return DIArray(MDNode::get(VMContext, Null)); } - return DIArray(MDNode::get(VMContext, Elements, NumElements)); + return DIArray(MDNode::get(VMContext, Elements)); } /// getOrCreateSubrange - Create a descriptor for a value range. This @@ -527,7 +554,7 @@ DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Hi) { ConstantInt::get(Type::getInt64Ty(VMContext), Hi) }; - return DISubrange(MDNode::get(VMContext, &Elts[0], 3)); + return DISubrange(MDNode::get(VMContext, Elts)); } /// createGlobalVariable - Create a new descriptor for the specified global. @@ -548,7 +575,7 @@ createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/ Val }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + MDNode *Node = MDNode::get(VMContext, Elts); // Create a named metadata so that we do not lose this mdnode. NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); NMD->addOperand(Node); @@ -575,7 +602,7 @@ createStaticVariable(DIDescriptor Context, StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/ Val }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + MDNode *Node = MDNode::get(VMContext, Elts); // Create a named metadata so that we do not lose this mdnode. NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); NMD->addOperand(Node); @@ -586,17 +613,18 @@ createStaticVariable(DIDescriptor Context, StringRef Name, DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNo, DIType Ty, - bool AlwaysPreserve, unsigned Flags) { + bool AlwaysPreserve, unsigned Flags, + unsigned ArgNo) { Value *Elts[] = { GetTagConstant(VMContext, Tag), Scope, MDString::get(VMContext, Name), File, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))), Ty, ConstantInt::get(Type::getInt32Ty(VMContext), Flags) }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + MDNode *Node = MDNode::get(VMContext, Elts); if (AlwaysPreserve) { // The optimizer may remove local variable. If there is an interest // to preserve variable info in such situation then stash it in a @@ -619,18 +647,19 @@ DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope, DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope, StringRef Name, DIFile F, unsigned LineNo, - DIType Ty, Value *const *Addr, - unsigned NumAddr) { + DIType Ty, ArrayRef<Value *> Addr, + unsigned ArgNo) { SmallVector<Value *, 15> Elts; Elts.push_back(GetTagConstant(VMContext, Tag)); Elts.push_back(Scope); Elts.push_back(MDString::get(VMContext, Name)); Elts.push_back(F); - Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)); + Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24)))); Elts.push_back(Ty); - Elts.append(Addr, Addr+NumAddr); + Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext))); + Elts.append(Addr.begin(), Addr.end()); - return DIVariable(MDNode::get(VMContext, Elts.data(), Elts.size())); + return DIVariable(MDNode::get(VMContext, Elts)); } /// createFunction - Create a new descriptor for the specified function. @@ -641,8 +670,9 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, DIType Ty, bool isLocalToUnit, bool isDefinition, unsigned Flags, bool isOptimized, - Function *Fn) { - + Function *Fn, + MDNode *TParams, + MDNode *Decl) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), @@ -660,9 +690,11 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), ConstantInt::get(Type::getInt32Ty(VMContext), Flags), ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), - Fn + Fn, + TParams, + Decl }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + MDNode *Node = MDNode::get(VMContext, Elts); // Create a named metadata so that we do not lose this mdnode. NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); @@ -682,7 +714,8 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, MDNode *VTableHolder, unsigned Flags, bool isOptimized, - Function *Fn) { + Function *Fn, + MDNode *TParam) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), @@ -700,9 +733,10 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, VTableHolder, ConstantInt::get(Type::getInt32Ty(VMContext), Flags), ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), - Fn + Fn, + TParam, }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + MDNode *Node = MDNode::get(VMContext, Elts); // Create a named metadata so that we do not lose this mdnode. NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); @@ -721,7 +755,7 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name, File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) }; - return DINameSpace(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DINameSpace(MDNode::get(VMContext, Elts)); } DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File, @@ -736,7 +770,7 @@ DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File, File, ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++) }; - return DILexicalBlock(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DILexicalBlock(MDNode::get(VMContext, Elts)); } /// insertDeclare - Insert a new llvm.dbg.declare intrinsic call. @@ -747,7 +781,7 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, if (!DeclareFn) DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); - Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), VarInfo }; + Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo }; return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore); } @@ -759,7 +793,7 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, if (!DeclareFn) DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); - Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), VarInfo }; + Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo }; // If this block already has a terminator then insert this intrinsic // before the terminator. @@ -778,7 +812,7 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, if (!ValueFn) ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); - Value *Args[] = { MDNode::get(V->getContext(), &V, 1), + Value *Args[] = { MDNode::get(V->getContext(), V), ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), VarInfo }; return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore); @@ -793,7 +827,7 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, if (!ValueFn) ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); - Value *Args[] = { MDNode::get(V->getContext(), &V, 1), + Value *Args[] = { MDNode::get(V->getContext(), V), ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), VarInfo }; return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd); diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index 9db1456..67f8147 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -725,484 +725,6 @@ void DIVariable::dump() const { print(dbgs()); dbgs() << '\n'; } -//===----------------------------------------------------------------------===// -// DIFactory: Basic Helpers -//===----------------------------------------------------------------------===// - -DIFactory::DIFactory(Module &m) - : M(m), VMContext(M.getContext()), DeclareFn(0), ValueFn(0) {} - -Constant *DIFactory::GetTagConstant(unsigned TAG) { - assert((TAG & LLVMDebugVersionMask) == 0 && - "Tag too large for debug encoding!"); - return ConstantInt::get(Type::getInt32Ty(VMContext), TAG | LLVMDebugVersion); -} - -//===----------------------------------------------------------------------===// -// DIFactory: Primary Constructors -//===----------------------------------------------------------------------===// - -/// GetOrCreateArray - Create an descriptor for an array of descriptors. -/// This implicitly uniques the arrays created. -DIArray DIFactory::GetOrCreateArray(DIDescriptor *Tys, unsigned NumTys) { - if (NumTys == 0) { - Value *Null = llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)); - return DIArray(MDNode::get(VMContext, &Null, 1)); - } - - SmallVector<Value *, 16> Elts(Tys, Tys+NumTys); - return DIArray(MDNode::get(VMContext, Elts.data(), Elts.size())); -} - -/// GetOrCreateSubrange - Create a descriptor for a value range. This -/// implicitly uniques the values returned. -DISubrange DIFactory::GetOrCreateSubrange(int64_t Lo, int64_t Hi) { - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_subrange_type), - ConstantInt::get(Type::getInt64Ty(VMContext), Lo), - ConstantInt::get(Type::getInt64Ty(VMContext), Hi) - }; - - return DISubrange(MDNode::get(VMContext, &Elts[0], 3)); -} - -/// CreateUnspecifiedParameter - Create unspeicified type descriptor -/// for the subroutine type. -DIDescriptor DIFactory::CreateUnspecifiedParameter() { - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_unspecified_parameters) - }; - return DIDescriptor(MDNode::get(VMContext, &Elts[0], 1)); -} - -/// CreateCompileUnit - Create a new descriptor for the specified compile -/// unit. Note that this does not unique compile units within the module. -DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID, - StringRef Filename, - StringRef Directory, - StringRef Producer, - bool isMain, - bool isOptimized, - StringRef Flags, - unsigned RunTimeVer) { - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_compile_unit), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - ConstantInt::get(Type::getInt32Ty(VMContext), LangID), - MDString::get(VMContext, Filename), - MDString::get(VMContext, Directory), - MDString::get(VMContext, Producer), - ConstantInt::get(Type::getInt1Ty(VMContext), isMain), - ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), - MDString::get(VMContext, Flags), - ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer) - }; - - return DICompileUnit(MDNode::get(VMContext, &Elts[0], 10)); -} - -/// CreateFile - Create a new descriptor for the specified file. -DIFile DIFactory::CreateFile(StringRef Filename, - StringRef Directory, - DICompileUnit CU) { - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_file_type), - MDString::get(VMContext, Filename), - MDString::get(VMContext, Directory), - CU - }; - - return DIFile(MDNode::get(VMContext, &Elts[0], 4)); -} - -/// CreateEnumerator - Create a single enumerator value. -DIEnumerator DIFactory::CreateEnumerator(StringRef Name, uint64_t Val){ - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_enumerator), - MDString::get(VMContext, Name), - ConstantInt::get(Type::getInt64Ty(VMContext), Val) - }; - return DIEnumerator(MDNode::get(VMContext, &Elts[0], 3)); -} - - -/// CreateBasicType - Create a basic type like int, float, etc. -DIBasicType DIFactory::CreateBasicType(DIDescriptor Context, - StringRef Name, - DIFile F, - unsigned LineNumber, - uint64_t SizeInBits, - uint64_t AlignInBits, - uint64_t OffsetInBits, unsigned Flags, - unsigned Encoding) { - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_base_type), - Context, - MDString::get(VMContext, Name), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), - ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - ConstantInt::get(Type::getInt32Ty(VMContext), Encoding) - }; - return DIBasicType(MDNode::get(VMContext, &Elts[0], 10)); -} - - -/// CreateBasicType - Create a basic type like int, float, etc. -DIBasicType DIFactory::CreateBasicTypeEx(DIDescriptor Context, - StringRef Name, - DIFile F, - unsigned LineNumber, - Constant *SizeInBits, - Constant *AlignInBits, - Constant *OffsetInBits, unsigned Flags, - unsigned Encoding) { - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_base_type), - Context, - MDString::get(VMContext, Name), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), - SizeInBits, - AlignInBits, - OffsetInBits, - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - ConstantInt::get(Type::getInt32Ty(VMContext), Encoding) - }; - return DIBasicType(MDNode::get(VMContext, &Elts[0], 10)); -} - -/// CreateArtificialType - Create a new DIType with "artificial" flag set. -DIType DIFactory::CreateArtificialType(DIType Ty) { - if (Ty.isArtificial()) - return Ty; - - SmallVector<Value *, 9> Elts; - MDNode *N = Ty; - assert (N && "Unexpected input DIType!"); - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - if (Value *V = N->getOperand(i)) - Elts.push_back(V); - else - Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext))); - } - - unsigned CurFlags = Ty.getFlags(); - CurFlags = CurFlags | DIType::FlagArtificial; - - // Flags are stored at this slot. - Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags); - - return DIType(MDNode::get(VMContext, Elts.data(), Elts.size())); -} - -/// CreateDerivedType - Create a derived type like const qualified type, -/// pointer, typedef, etc. -DIDerivedType DIFactory::CreateDerivedType(unsigned Tag, - DIDescriptor Context, - StringRef Name, - DIFile F, - unsigned LineNumber, - uint64_t SizeInBits, - uint64_t AlignInBits, - uint64_t OffsetInBits, - unsigned Flags, - DIType DerivedFrom) { - Value *Elts[] = { - GetTagConstant(Tag), - Context, - MDString::get(VMContext, Name), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), - ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - DerivedFrom, - }; - return DIDerivedType(MDNode::get(VMContext, &Elts[0], 10)); -} - - -/// CreateDerivedType - Create a derived type like const qualified type, -/// pointer, typedef, etc. -DIDerivedType DIFactory::CreateDerivedTypeEx(unsigned Tag, - DIDescriptor Context, - StringRef Name, - DIFile F, - unsigned LineNumber, - Constant *SizeInBits, - Constant *AlignInBits, - Constant *OffsetInBits, - unsigned Flags, - DIType DerivedFrom) { - Value *Elts[] = { - GetTagConstant(Tag), - Context, - MDString::get(VMContext, Name), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), - SizeInBits, - AlignInBits, - OffsetInBits, - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - DerivedFrom, - }; - return DIDerivedType(MDNode::get(VMContext, &Elts[0], 10)); -} - - -/// CreateCompositeType - Create a composite type like array, struct, etc. -DICompositeType DIFactory::CreateCompositeType(unsigned Tag, - DIDescriptor Context, - StringRef Name, - DIFile F, - unsigned LineNumber, - uint64_t SizeInBits, - uint64_t AlignInBits, - uint64_t OffsetInBits, - unsigned Flags, - DIType DerivedFrom, - DIArray Elements, - unsigned RuntimeLang, - MDNode *ContainingType) { - - Value *Elts[] = { - GetTagConstant(Tag), - Context, - MDString::get(VMContext, Name), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), - ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - DerivedFrom, - Elements, - ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang), - ContainingType - }; - - MDNode *Node = MDNode::get(VMContext, &Elts[0], 13); - // Create a named metadata so that we do not lose this enum info. - if (Tag == dwarf::DW_TAG_enumeration_type) { - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum"); - NMD->addOperand(Node); - } - return DICompositeType(Node); -} - -/// CreateTemporaryType - Create a temporary forward-declared type. -DIType DIFactory::CreateTemporaryType() { - // Give the temporary MDNode a tag. It doesn't matter what tag we - // use here as long as DIType accepts it. - Value *Elts[] = { - GetTagConstant(DW_TAG_base_type) - }; - MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts)); - return DIType(Node); -} - -/// CreateTemporaryType - Create a temporary forward-declared type. -DIType DIFactory::CreateTemporaryType(DIFile F) { - // Give the temporary MDNode a tag. It doesn't matter what tag we - // use here as long as DIType accepts it. - Value *Elts[] = { - GetTagConstant(DW_TAG_base_type), - F.getCompileUnit(), - NULL, - F - }; - MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts)); - return DIType(Node); -} - -/// CreateCompositeType - Create a composite type like array, struct, etc. -DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag, - DIDescriptor Context, - StringRef Name, - DIFile F, - unsigned LineNumber, - Constant *SizeInBits, - Constant *AlignInBits, - Constant *OffsetInBits, - unsigned Flags, - DIType DerivedFrom, - DIArray Elements, - unsigned RuntimeLang, - MDNode *ContainingType) { - Value *Elts[] = { - GetTagConstant(Tag), - Context, - MDString::get(VMContext, Name), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), - SizeInBits, - AlignInBits, - OffsetInBits, - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - DerivedFrom, - Elements, - ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang), - ContainingType - }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], 13); - // Create a named metadata so that we do not lose this enum info. - if (Tag == dwarf::DW_TAG_enumeration_type) { - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum"); - NMD->addOperand(Node); - } - return DICompositeType(Node); -} - - -/// CreateSubprogram - Create a new descriptor for the specified subprogram. -/// See comments in DISubprogram for descriptions of these fields. This -/// method does not unique the generated descriptors. -DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, - StringRef Name, - StringRef DisplayName, - StringRef LinkageName, - DIFile F, - unsigned LineNo, DIType Ty, - bool isLocalToUnit, - bool isDefinition, - unsigned VK, unsigned VIndex, - DIType ContainingType, - unsigned Flags, - bool isOptimized, - Function *Fn) { - - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_subprogram), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - Context, - MDString::get(VMContext, Name), - MDString::get(VMContext, DisplayName), - MDString::get(VMContext, LinkageName), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), - Ty, - ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), - ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), - ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK), - ConstantInt::get(Type::getInt32Ty(VMContext), VIndex), - ContainingType, - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), - Fn - }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], 17); - - // Create a named metadata so that we do not lose this mdnode. - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); - NMD->addOperand(Node); - return DISubprogram(Node); -} - -/// CreateSubprogramDefinition - Create new subprogram descriptor for the -/// given declaration. -DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration){ - if (SPDeclaration.isDefinition()) - return DISubprogram(SPDeclaration); - - MDNode *DeclNode = SPDeclaration; - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_subprogram), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - DeclNode->getOperand(2), // Context - DeclNode->getOperand(3), // Name - DeclNode->getOperand(4), // DisplayName - DeclNode->getOperand(5), // LinkageName - DeclNode->getOperand(6), // CompileUnit - DeclNode->getOperand(7), // LineNo - DeclNode->getOperand(8), // Type - DeclNode->getOperand(9), // isLocalToUnit - ConstantInt::get(Type::getInt1Ty(VMContext), true), - DeclNode->getOperand(11), // Virtuality - DeclNode->getOperand(12), // VIndex - DeclNode->getOperand(13), // Containting Type - DeclNode->getOperand(14), // Flags - DeclNode->getOperand(15), // isOptimized - SPDeclaration.getFunction() - }; - MDNode *Node =MDNode::get(VMContext, &Elts[0], 16); - - // Create a named metadata so that we do not lose this mdnode. - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); - NMD->addOperand(Node); - return DISubprogram(Node); -} - -/// CreateGlobalVariable - Create a new descriptor for the specified global. -DIGlobalVariable -DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name, - StringRef DisplayName, - StringRef LinkageName, - DIFile F, - unsigned LineNo, DIType Ty,bool isLocalToUnit, - bool isDefinition, llvm::GlobalVariable *Val) { - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_variable), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - Context, - MDString::get(VMContext, Name), - MDString::get(VMContext, DisplayName), - MDString::get(VMContext, LinkageName), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), - Ty, - ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), - ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), - Val - }; - - Value *const *Vs = &Elts[0]; - MDNode *Node = MDNode::get(VMContext,Vs, 12); - - // Create a named metadata so that we do not lose this mdnode. - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); - NMD->addOperand(Node); - - return DIGlobalVariable(Node); -} - -/// CreateGlobalVariable - Create a new descriptor for the specified constant. -DIGlobalVariable -DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name, - StringRef DisplayName, - StringRef LinkageName, - DIFile F, - unsigned LineNo, DIType Ty,bool isLocalToUnit, - bool isDefinition, llvm::Constant *Val) { - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_variable), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - Context, - MDString::get(VMContext, Name), - MDString::get(VMContext, DisplayName), - MDString::get(VMContext, LinkageName), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), - Ty, - ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), - ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), - Val - }; - - Value *const *Vs = &Elts[0]; - MDNode *Node = MDNode::get(VMContext,Vs, 12); - - // Create a named metadata so that we do not lose this mdnode. - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); - NMD->addOperand(Node); - - return DIGlobalVariable(Node); -} - /// fixupObjcLikeName - Replace contains special characters used /// in a typical Objective-C names with '.' in a given string. static void fixupObjcLikeName(std::string &Str) { @@ -1214,19 +736,6 @@ static void fixupObjcLikeName(std::string &Str) { } } -/// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable -/// to hold function specific information. -NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) { - SmallString<32> Out; - if (FuncName.find('[') == StringRef::npos) - return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", FuncName) - .toStringRef(Out)); - std::string Name = FuncName; - fixupObjcLikeName(Name); - return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", Name) - .toStringRef(Out)); -} - /// getFnSpecificMDNode - Return a NameMDNode, if available, that is /// suitable to hold function specific information. NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, StringRef FuncName) { @@ -1237,178 +746,18 @@ NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, StringRef FuncName) { return M.getNamedMetadata(Twine("llvm.dbg.lv.", Name)); } -/// CreateVariable - Create a new descriptor for the specified variable. -DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context, - StringRef Name, - DIFile F, - unsigned LineNo, - DIType Ty, bool AlwaysPreserve, - unsigned Flags) { - Value *Elts[] = { - GetTagConstant(Tag), - Context, - MDString::get(VMContext, Name), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), - Ty, - ConstantInt::get(Type::getInt32Ty(VMContext), Flags) - }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], 7); - if (AlwaysPreserve) { - // The optimizer may remove local variable. If there is an interest - // to preserve variable info in such situation then stash it in a - // named mdnode. - DISubprogram Fn(getDISubprogram(Context)); - StringRef FName = "fn"; - if (Fn.getFunction()) - FName = Fn.getFunction()->getName(); - char One = '\1'; - if (FName.startswith(StringRef(&One, 1))) - FName = FName.substr(1); - - - NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, FName); - FnLocals->addOperand(Node); - } - return DIVariable(Node); -} - - -/// CreateComplexVariable - Create a new descriptor for the specified variable -/// which has a complex address expression for its address. -DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context, - StringRef Name, DIFile F, - unsigned LineNo, - DIType Ty, Value *const *Addr, - unsigned NumAddr) { - SmallVector<Value *, 15> Elts; - Elts.push_back(GetTagConstant(Tag)); - Elts.push_back(Context); - Elts.push_back(MDString::get(VMContext, Name)); - Elts.push_back(F); - Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)); - Elts.push_back(Ty); - Elts.append(Addr, Addr+NumAddr); - - return DIVariable(MDNode::get(VMContext, Elts.data(), Elts.size())); -} - - -/// CreateBlock - This creates a descriptor for a lexical block with the -/// specified parent VMContext. -DILexicalBlock DIFactory::CreateLexicalBlock(DIDescriptor Context, - DIFile F, unsigned LineNo, - unsigned Col) { - // Defeat MDNode uniqing for lexical blocks. - static unsigned int unique_id = 0; - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_lexical_block), - Context, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), - ConstantInt::get(Type::getInt32Ty(VMContext), Col), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++) - }; - return DILexicalBlock(MDNode::get(VMContext, &Elts[0], 6)); -} - -/// CreateNameSpace - This creates new descriptor for a namespace -/// with the specified parent context. -DINameSpace DIFactory::CreateNameSpace(DIDescriptor Context, StringRef Name, - DIFile F, - unsigned LineNo) { - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_namespace), - Context, - MDString::get(VMContext, Name), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) - }; - return DINameSpace(MDNode::get(VMContext, &Elts[0], 5)); -} - -/// CreateLocation - Creates a debug info location. -DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo, - DIScope S, DILocation OrigLoc) { - Value *Elts[] = { - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), - ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo), - S, - OrigLoc, - }; - return DILocation(MDNode::get(VMContext, &Elts[0], 4)); -} - -//===----------------------------------------------------------------------===// -// DIFactory: Routines for inserting code into a function -//===----------------------------------------------------------------------===// - -/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. -Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D, - Instruction *InsertBefore) { - assert(Storage && "no storage passed to dbg.declare"); - assert(D.Verify() && "empty DIVariable passed to dbg.declare"); - if (!DeclareFn) - DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); - - Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), - D }; - return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore); -} - -/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. -Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D, - BasicBlock *InsertAtEnd) { - assert(Storage && "no storage passed to dbg.declare"); - assert(D.Verify() && "invalid DIVariable passed to dbg.declare"); - if (!DeclareFn) - DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); - - Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), - D }; - - // If this block already has a terminator then insert this intrinsic - // before the terminator. - if (TerminatorInst *T = InsertAtEnd->getTerminator()) - return CallInst::Create(DeclareFn, Args, Args+2, "", T); - else - return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);} - -/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. -Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, - DIVariable D, - Instruction *InsertBefore) { - assert(V && "no value passed to dbg.value"); - assert(D.Verify() && "invalid DIVariable passed to dbg.value"); - if (!ValueFn) - ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); - - Value *Args[] = { MDNode::get(V->getContext(), &V, 1), - ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), - D }; - return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore); -} - -/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. -Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, - DIVariable D, - BasicBlock *InsertAtEnd) { - assert(V && "no value passed to dbg.value"); - assert(D.Verify() && "invalid DIVariable passed to dbg.value"); - if (!ValueFn) - ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); - - Value *Args[] = { MDNode::get(V->getContext(), &V, 1), - ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), - D }; - return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd); -} - -// RecordType - Record DIType in a module such that it is not lost even if -// it is not referenced through debug info anchors. -void DIFactory::RecordType(DIType T) { - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.ty"); - NMD->addOperand(T); +/// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable +/// to hold function specific information. +NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) { + SmallString<32> Out; + if (FuncName.find('[') == StringRef::npos) + return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", FuncName) + .toStringRef(Out)); + + std::string Name = FuncName; + fixupObjcLikeName(Name); + return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", Name) + .toStringRef(Out)); } diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index 116aaf4..b226d66 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -602,7 +602,7 @@ void GlobalsModRef::addEscapingUse(Use &U) { // For the purposes of this analysis, it is conservatively correct to treat // a newly escaping value equivalently to a deleted one. We could perhaps // be more precise by processing the new use and attempting to update our - // saved analysis results to accomodate it. + // saved analysis results to accommodate it. deleteValue(U); AliasAnalysis::addEscapingUse(U); diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index c838218..2cda791 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Target/TargetData.h" #include "llvm/Assembly/Writer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" @@ -83,7 +84,10 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { return false; // Void and FP expressions cannot be reduced. // LSR is not APInt clean, do not touch integers bigger than 64-bits. - if (SE->getTypeSizeInBits(I->getType()) > 64) + // Also avoid creating IVs of non-native types. For example, we don't want a + // 64-bit IV in 32-bit code just because the loop has one 64-bit cast. + uint64_t Width = SE->getTypeSizeInBits(I->getType()); + if (Width > 64 || (TD && !TD->isLegalInteger(Width))) return false; if (!Processed.insert(I)) @@ -167,6 +171,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { LI = &getAnalysis<LoopInfo>(); DT = &getAnalysis<DominatorTree>(); SE = &getAnalysis<ScalarEvolution>(); + TD = getAnalysisIfAvailable<TargetData>(); // Find all uses of induction variables in this loop, and categorize // them by stride. Start by finding all of the PHI nodes in the header for diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 47f91cf..a820ecf 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -501,7 +501,7 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, return InlineCost::getAlways(); if (CalleeFI->Metrics.usesDynamicAlloca) { - // Get infomation about the caller. + // Get information about the caller. FunctionInfo &CallerFI = CachedFunctionInfo[Caller]; // If we haven't calculated this information yet, do so now. @@ -549,7 +549,7 @@ InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee, int Cost = 0; - // Look at the orginal size of the callee. Each instruction counts as 5. + // Look at the original size of the callee. Each instruction counts as 5. Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost; // Offset that with the amount of code that can be constant-folded diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 982dacb..9d6d339 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -18,11 +18,13 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "instsimplify" +#include "llvm/Operator.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/ConstantRange.h" #include "llvm/Support/PatternMatch.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Target/TargetData.h" @@ -899,6 +901,111 @@ Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD, return ::SimplifyFDivInst(Op0, Op1, TD, DT, RecursionLimit); } +/// SimplifyRem - Given operands for an SRem or URem, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + if (Constant *C0 = dyn_cast<Constant>(Op0)) { + if (Constant *C1 = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { C0, C1 }; + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD); + } + } + + bool isSigned = Opcode == Instruction::SRem; + + // X % undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + // undef % X -> 0 + if (match(Op0, m_Undef())) + return Constant::getNullValue(Op0->getType()); + + // 0 % X -> 0, we don't need to preserve faults! + if (match(Op0, m_Zero())) + return Op0; + + // X % 0 -> undef, we don't need to preserve faults! + if (match(Op1, m_Zero())) + return UndefValue::get(Op0->getType()); + + // X % 1 -> 0 + if (match(Op1, m_One())) + return Constant::getNullValue(Op0->getType()); + + if (Op0->getType()->isIntegerTy(1)) + // It can't be remainder by zero, hence it must be remainder by one. + return Constant::getNullValue(Op0->getType()); + + // X % X -> 0 + if (Op0 == Op1) + return Constant::getNullValue(Op0->getType()); + + // If the operation is with the result of a select instruction, check whether + // operating on either branch of the select always yields the same value. + if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) + if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + // If the operation is with the result of a phi instruction, check whether + // operating on all incoming values of the phi always yields the same value. + if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) + if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + return 0; +} + +/// SimplifySRemInst - Given operands for an SRem, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { + if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifySRemInst(Op0, Op1, TD, DT, RecursionLimit); +} + +/// SimplifyURemInst - Given operands for a URem, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { + if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifyURemInst(Op0, Op1, TD, DT, RecursionLimit); +} + +static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *, + const DominatorTree *, unsigned) { + // undef % X -> undef (the undef could be a snan). + if (match(Op0, m_Undef())) + return Op0; + + // X % undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + return 0; +} + +Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifyFRemInst(Op0, Op1, TD, DT, RecursionLimit); +} + /// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can /// fold the result. If not, this returns null. static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, @@ -1343,7 +1450,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // the compare, and if only one of them is then we moved it to RHS already. if (isa<AllocaInst>(LHS) && (isa<GlobalValue>(RHS) || isa<AllocaInst>(RHS) || isa<ConstantPointerNull>(RHS))) - // We already know that LHS != LHS. + // We already know that LHS != RHS. return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred)); // If we are comparing with zero then try hard since this is a common case. @@ -1399,40 +1506,66 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // See if we are doing a comparison with a constant integer. if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { - switch (Pred) { - default: break; - case ICmpInst::ICMP_UGT: - if (CI->isMaxValue(false)) // A >u MAX -> FALSE - return ConstantInt::getFalse(CI->getContext()); - break; - case ICmpInst::ICMP_UGE: - if (CI->isMinValue(false)) // A >=u MIN -> TRUE - return ConstantInt::getTrue(CI->getContext()); - break; - case ICmpInst::ICMP_ULT: - if (CI->isMinValue(false)) // A <u MIN -> FALSE - return ConstantInt::getFalse(CI->getContext()); - break; - case ICmpInst::ICMP_ULE: - if (CI->isMaxValue(false)) // A <=u MAX -> TRUE - return ConstantInt::getTrue(CI->getContext()); - break; - case ICmpInst::ICMP_SGT: - if (CI->isMaxValue(true)) // A >s MAX -> FALSE - return ConstantInt::getFalse(CI->getContext()); - break; - case ICmpInst::ICMP_SGE: - if (CI->isMinValue(true)) // A >=s MIN -> TRUE - return ConstantInt::getTrue(CI->getContext()); - break; - case ICmpInst::ICMP_SLT: - if (CI->isMinValue(true)) // A <s MIN -> FALSE - return ConstantInt::getFalse(CI->getContext()); - break; - case ICmpInst::ICMP_SLE: - if (CI->isMaxValue(true)) // A <=s MAX -> TRUE - return ConstantInt::getTrue(CI->getContext()); - break; + // Rule out tautological comparisons (eg., ult 0 or uge 0). + ConstantRange RHS_CR = ICmpInst::makeConstantRange(Pred, CI->getValue()); + if (RHS_CR.isEmptySet()) + return ConstantInt::getFalse(CI->getContext()); + if (RHS_CR.isFullSet()) + return ConstantInt::getTrue(CI->getContext()); + + // Many binary operators with constant RHS have easy to compute constant + // range. Use them to check whether the comparison is a tautology. + uint32_t Width = CI->getBitWidth(); + APInt Lower = APInt(Width, 0); + APInt Upper = APInt(Width, 0); + ConstantInt *CI2; + if (match(LHS, m_URem(m_Value(), m_ConstantInt(CI2)))) { + // 'urem x, CI2' produces [0, CI2). + Upper = CI2->getValue(); + } else if (match(LHS, m_SRem(m_Value(), m_ConstantInt(CI2)))) { + // 'srem x, CI2' produces (-|CI2|, |CI2|). + Upper = CI2->getValue().abs(); + Lower = (-Upper) + 1; + } else if (match(LHS, m_UDiv(m_Value(), m_ConstantInt(CI2)))) { + // 'udiv x, CI2' produces [0, UINT_MAX / CI2]. + APInt NegOne = APInt::getAllOnesValue(Width); + if (!CI2->isZero()) + Upper = NegOne.udiv(CI2->getValue()) + 1; + } else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) { + // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2]. + APInt IntMin = APInt::getSignedMinValue(Width); + APInt IntMax = APInt::getSignedMaxValue(Width); + APInt Val = CI2->getValue().abs(); + if (!Val.isMinValue()) { + Lower = IntMin.sdiv(Val); + Upper = IntMax.sdiv(Val) + 1; + } + } else if (match(LHS, m_LShr(m_Value(), m_ConstantInt(CI2)))) { + // 'lshr x, CI2' produces [0, UINT_MAX >> CI2]. + APInt NegOne = APInt::getAllOnesValue(Width); + if (CI2->getValue().ult(Width)) + Upper = NegOne.lshr(CI2->getValue()) + 1; + } else if (match(LHS, m_AShr(m_Value(), m_ConstantInt(CI2)))) { + // 'ashr x, CI2' produces [INT_MIN >> CI2, INT_MAX >> CI2]. + APInt IntMin = APInt::getSignedMinValue(Width); + APInt IntMax = APInt::getSignedMaxValue(Width); + if (CI2->getValue().ult(Width)) { + Lower = IntMin.ashr(CI2->getValue()); + Upper = IntMax.ashr(CI2->getValue()) + 1; + } + } else if (match(LHS, m_Or(m_Value(), m_ConstantInt(CI2)))) { + // 'or x, CI2' produces [CI2, UINT_MAX]. + Lower = CI2->getValue(); + } else if (match(LHS, m_And(m_Value(), m_ConstantInt(CI2)))) { + // 'and x, CI2' produces [0, CI2]. + Upper = CI2->getValue() + 1; + } + if (Lower != Upper) { + ConstantRange LHS_CR = ConstantRange(Lower, Upper); + if (RHS_CR.contains(LHS_CR)) + return ConstantInt::getTrue(RHS->getContext()); + if (RHS_CR.inverse().contains(LHS_CR)) + return ConstantInt::getFalse(RHS->getContext()); } } @@ -1644,6 +1777,93 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } + if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) { + bool KnownNonNegative, KnownNegative; + switch (Pred) { + default: + break; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD); + if (!KnownNonNegative) + break; + // fall-through + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + return ConstantInt::getFalse(RHS->getContext()); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD); + if (!KnownNonNegative) + break; + // fall-through + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + return ConstantInt::getTrue(RHS->getContext()); + } + } + if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) { + bool KnownNonNegative, KnownNegative; + switch (Pred) { + default: + break; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD); + if (!KnownNonNegative) + break; + // fall-through + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + return ConstantInt::getTrue(RHS->getContext()); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD); + if (!KnownNonNegative) + break; + // fall-through + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + return ConstantInt::getFalse(RHS->getContext()); + } + } + + if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() && + LBO->getOperand(1) == RBO->getOperand(1)) { + switch (LBO->getOpcode()) { + default: break; + case Instruction::UDiv: + case Instruction::LShr: + if (ICmpInst::isSigned(Pred)) + break; + // fall-through + case Instruction::SDiv: + case Instruction::AShr: + if (!LBO->isExact() && !RBO->isExact()) + break; + if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), + RBO->getOperand(0), TD, DT, MaxRecurse-1)) + return V; + break; + case Instruction::Shl: { + bool NUW = LBO->hasNoUnsignedWrap() && LBO->hasNoUnsignedWrap(); + bool NSW = LBO->hasNoSignedWrap() && RBO->hasNoSignedWrap(); + if (!NUW && !NSW) + break; + if (!NSW && ICmpInst::isSigned(Pred)) + break; + if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), + RBO->getOperand(0), TD, DT, MaxRecurse-1)) + return V; + break; + } + } + } + // If the comparison is with the result of a select instruction, check whether // comparing with either branch of the select always yields the same value. if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS)) @@ -1879,6 +2099,9 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, DT, MaxRecurse); case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, DT, MaxRecurse); case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, DT, MaxRecurse); + case Instruction::SRem: return SimplifySRemInst(LHS, RHS, TD, DT, MaxRecurse); + case Instruction::URem: return SimplifyURemInst(LHS, RHS, TD, DT, MaxRecurse); + case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, TD, DT, MaxRecurse); case Instruction::Shl: return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, TD, DT, MaxRecurse); @@ -1973,6 +2196,15 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD, case Instruction::FDiv: Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, DT); break; + case Instruction::SRem: + Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; + case Instruction::URem: + Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; + case Instruction::FRem: + Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; case Instruction::Shl: Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->hasNoSignedWrap(), diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 9e7da6c..d5f0b5c 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -29,7 +29,6 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include <map> -#include <set> #include <stack> using namespace llvm; @@ -268,6 +267,8 @@ public: } // end anonymous namespace. namespace llvm { +raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) + LLVM_ATTRIBUTE_USED; raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) { if (Val.isUndefined()) return OS << "undefined"; diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index fc7edc0..f130f30 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -606,7 +606,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, Type::getInt64Ty(V->getContext()))) return findValueImpl(CE->getOperand(0), OffsetOk, Visited); } else if (CE->getOpcode() == Instruction::ExtractValue) { - const SmallVector<unsigned, 4> &Indices = CE->getIndices(); + ArrayRef<unsigned> Indices = CE->getIndices(); if (Value *W = FindInsertedValue(CE->getOperand(0), Indices.begin(), Indices.end())) diff --git a/lib/Analysis/LiveValues.cpp b/lib/Analysis/LiveValues.cpp deleted file mode 100644 index a0e6034..0000000 --- a/lib/Analysis/LiveValues.cpp +++ /dev/null @@ -1,200 +0,0 @@ -//===- LiveValues.cpp - Liveness information for LLVM IR Values. ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the implementation for the LLVM IR Value liveness -// analysis pass. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/LiveValues.h" -#include "llvm/Instructions.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/LoopInfo.h" -using namespace llvm; - -namespace llvm { - FunctionPass *createLiveValuesPass() { return new LiveValues(); } -} - -char LiveValues::ID = 0; -INITIALIZE_PASS_BEGIN(LiveValues, "live-values", - "Value Liveness Analysis", false, true) -INITIALIZE_PASS_DEPENDENCY(DominatorTree) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) -INITIALIZE_PASS_END(LiveValues, "live-values", - "Value Liveness Analysis", false, true) - -LiveValues::LiveValues() : FunctionPass(ID) { - initializeLiveValuesPass(*PassRegistry::getPassRegistry()); -} - -void LiveValues::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<DominatorTree>(); - AU.addRequired<LoopInfo>(); - AU.setPreservesAll(); -} - -bool LiveValues::runOnFunction(Function &F) { - DT = &getAnalysis<DominatorTree>(); - LI = &getAnalysis<LoopInfo>(); - - // This pass' values are computed lazily, so there's nothing to do here. - - return false; -} - -void LiveValues::releaseMemory() { - Memos.clear(); -} - -/// isUsedInBlock - Test if the given value is used in the given block. -/// -bool LiveValues::isUsedInBlock(const Value *V, const BasicBlock *BB) { - Memo &M = getMemo(V); - return M.Used.count(BB); -} - -/// isLiveThroughBlock - Test if the given value is known to be -/// live-through the given block, meaning that the block is properly -/// dominated by the value's definition, and there exists a block -/// reachable from it that contains a use. This uses a conservative -/// approximation that errs on the side of returning false. -/// -bool LiveValues::isLiveThroughBlock(const Value *V, - const BasicBlock *BB) { - Memo &M = getMemo(V); - return M.LiveThrough.count(BB); -} - -/// isKilledInBlock - Test if the given value is known to be killed in -/// the given block, meaning that the block contains a use of the value, -/// and no blocks reachable from the block contain a use. This uses a -/// conservative approximation that errs on the side of returning false. -/// -bool LiveValues::isKilledInBlock(const Value *V, const BasicBlock *BB) { - Memo &M = getMemo(V); - return M.Killed.count(BB); -} - -/// getMemo - Retrieve an existing Memo for the given value if one -/// is available, otherwise compute a new one. -/// -LiveValues::Memo &LiveValues::getMemo(const Value *V) { - DenseMap<const Value *, Memo>::iterator I = Memos.find(V); - if (I != Memos.end()) - return I->second; - return compute(V); -} - -/// getImmediateDominator - A handy utility for the specific DominatorTree -/// query that we need here. -/// -static const BasicBlock *getImmediateDominator(const BasicBlock *BB, - const DominatorTree *DT) { - DomTreeNode *Node = DT->getNode(const_cast<BasicBlock *>(BB))->getIDom(); - return Node ? Node->getBlock() : 0; -} - -/// compute - Compute a new Memo for the given value. -/// -LiveValues::Memo &LiveValues::compute(const Value *V) { - Memo &M = Memos[V]; - - // Determine the block containing the definition. - const BasicBlock *DefBB; - // Instructions define values with meaningful live ranges. - if (const Instruction *I = dyn_cast<Instruction>(V)) - DefBB = I->getParent(); - // Arguments can be analyzed as values defined in the entry block. - else if (const Argument *A = dyn_cast<Argument>(V)) - DefBB = &A->getParent()->getEntryBlock(); - // Constants and other things aren't meaningful here, so just - // return having computed an empty Memo so that we don't come - // here again. The assumption here is that client code won't - // be asking about such values very often. - else - return M; - - // Determine if the value is defined inside a loop. This is used - // to track whether the value is ever used outside the loop, so - // it'll be set to null if the value is either not defined in a - // loop or used outside the loop in which it is defined. - const Loop *L = LI->getLoopFor(DefBB); - - // Track whether the value is used anywhere outside of the block - // in which it is defined. - bool LiveOutOfDefBB = false; - - // Examine each use of the value. - for (Value::const_use_iterator I = V->use_begin(), E = V->use_end(); - I != E; ++I) { - const User *U = *I; - const BasicBlock *UseBB = cast<Instruction>(U)->getParent(); - - // Note the block in which this use occurs. - M.Used.insert(UseBB); - - // If the use block doesn't have successors, the value can be - // considered killed. - if (succ_begin(UseBB) == succ_end(UseBB)) - M.Killed.insert(UseBB); - - // Observe whether the value is used outside of the loop in which - // it is defined. Switch to an enclosing loop if necessary. - for (; L; L = L->getParentLoop()) - if (L->contains(UseBB)) - break; - - // Search for live-through blocks. - const BasicBlock *BB; - if (const PHINode *PHI = dyn_cast<PHINode>(U)) { - // For PHI nodes, start the search at the incoming block paired with the - // incoming value, which must be dominated by the definition. - unsigned Num = PHI->getIncomingValueNumForOperand(I.getOperandNo()); - BB = PHI->getIncomingBlock(Num); - - // A PHI-node use means the value is live-out of it's defining block - // even if that block also contains the only use. - LiveOutOfDefBB = true; - } else { - // Otherwise just start the search at the use. - BB = UseBB; - - // Note if the use is outside the defining block. - LiveOutOfDefBB |= UseBB != DefBB; - } - - // Climb the immediate dominator tree from the use to the definition - // and mark all intermediate blocks as live-through. - for (; BB != DefBB; BB = getImmediateDominator(BB, DT)) { - if (BB != UseBB && !M.LiveThrough.insert(BB)) - break; - } - } - - // If the value is defined inside a loop and is not live outside - // the loop, then each exit block of the loop in which the value - // is used is a kill block. - if (L) { - SmallVector<BasicBlock *, 4> ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { - const BasicBlock *ExitingBlock = ExitingBlocks[i]; - if (M.Used.count(ExitingBlock)) - M.Killed.insert(ExitingBlock); - } - } - - // If the value was never used outside the block in which it was - // defined, it's killed in that block. - if (!LiveOutOfDefBB) - M.Killed.insert(DefBB); - - return M; -} diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 2ea27fb..ab34fd6 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -17,6 +17,7 @@ #include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Operator.h" using namespace llvm; /// AreEquivalentAddressValues - Test if A and B will obviously have the same diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index 8e1a7bf..10e3f29 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -14,8 +14,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopPass.h" +#include "llvm/DebugInfoProbe.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Timer.h" using namespace llvm; @@ -52,6 +54,20 @@ char PrintLoopPass::ID = 0; } //===----------------------------------------------------------------------===// +// DebugInfoProbe + +static DebugInfoProbeInfo *TheDebugProbe; +static void createDebugInfoProbe() { + if (TheDebugProbe) return; + + // Constructed the first time this is called. This guarantees that the + // object will be constructed, if -enable-debug-info-probe is set, + // before static globals, thus it will be destroyed before them. + static ManagedStatic<DebugInfoProbeInfo> DIP; + TheDebugProbe = &*DIP; +} + +//===----------------------------------------------------------------------===// // LPPassManager // @@ -223,6 +239,7 @@ void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const { bool LPPassManager::runOnFunction(Function &F) { LI = &getAnalysis<LoopInfo>(); bool Changed = false; + createDebugInfoProbe(); // Collect inherited analysis from Module level pass manager. populateInheritedAnalysis(TPM->activeStack); @@ -254,19 +271,21 @@ bool LPPassManager::runOnFunction(Function &F) { // Run all passes on the current Loop. for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { LoopPass *P = getContainedPass(Index); - dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG, CurrentLoop->getHeader()->getName()); dumpRequiredSet(P); initializeAnalysisImpl(P); - + if (TheDebugProbe) + TheDebugProbe->initialize(P, F); { PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader()); TimeRegion PassTimer(getPassTimer(P)); Changed |= P->runOnLoop(CurrentLoop, *this); } + if (TheDebugProbe) + TheDebugProbe->finalize(P, F); if (Changed) dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG, diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 1ab18ca..769c68c 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -35,7 +35,13 @@ static bool isMallocCall(const CallInst *CI) { return false; Function *Callee = CI->getCalledFunction(); - if (Callee == 0 || !Callee->isDeclaration() || Callee->getName() != "malloc") + if (Callee == 0 || !Callee->isDeclaration()) + return false; + if (Callee->getName() != "malloc" && + Callee->getName() != "_Znwj" && // operator new(unsigned int) + Callee->getName() != "_Znwm" && // operator new(unsigned long) + Callee->getName() != "_Znaj" && // operator new[](unsigned int) + Callee->getName() != "_Znam") // operator new[](unsigned long) return false; // Check malloc prototype. @@ -189,7 +195,12 @@ const CallInst *llvm::isFreeCall(const Value *I) { if (!CI) return 0; Function *Callee = CI->getCalledFunction(); - if (Callee == 0 || !Callee->isDeclaration() || Callee->getName() != "free") + if (Callee == 0 || !Callee->isDeclaration()) + return 0; + + if (Callee->getName() != "free" && + Callee->getName() != "_ZdlPv" && // operator delete(void*) + Callee->getName() != "_ZdaPv") // operator delete[](void*) return 0; // Check free prototype. diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 35043bd..ce7fab6 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -16,6 +16,7 @@ #define DEBUG_TYPE "memdep" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Function.h" @@ -221,6 +222,96 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, return MemDepResult::getClobber(ScanIt); } +/// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that +/// would fully overlap MemLoc if done as a wider legal integer load. +/// +/// MemLocBase, MemLocOffset are lazily computed here the first time the +/// base/offs of memloc is needed. +static bool +isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, + const Value *&MemLocBase, + int64_t &MemLocOffs, + const LoadInst *LI, + const TargetData *TD) { + // If we have no target data, we can't do this. + if (TD == 0) return false; + + // If we haven't already computed the base/offset of MemLoc, do so now. + if (MemLocBase == 0) + MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, *TD); + + unsigned Size = MemoryDependenceAnalysis:: + getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size, + LI, *TD); + return Size != 0; +} + +/// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that +/// looks at a memory location for a load (specified by MemLocBase, Offs, +/// and Size) and compares it against a load. If the specified load could +/// be safely widened to a larger integer load that is 1) still efficient, +/// 2) safe for the target, and 3) would provide the specified memory +/// location value, then this function returns the size in bytes of the +/// load width to use. If not, this returns zero. +unsigned MemoryDependenceAnalysis:: +getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, + unsigned MemLocSize, const LoadInst *LI, + const TargetData &TD) { + // We can only extend non-volatile integer loads. + if (!isa<IntegerType>(LI->getType()) || LI->isVolatile()) return 0; + + // Get the base of this load. + int64_t LIOffs = 0; + const Value *LIBase = + GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, TD); + + // If the two pointers are not based on the same pointer, we can't tell that + // they are related. + if (LIBase != MemLocBase) return 0; + + // Okay, the two values are based on the same pointer, but returned as + // no-alias. This happens when we have things like two byte loads at "P+1" + // and "P+3". Check to see if increasing the size of the "LI" load up to its + // alignment (or the largest native integer type) will allow us to load all + // the bits required by MemLoc. + + // If MemLoc is before LI, then no widening of LI will help us out. + if (MemLocOffs < LIOffs) return 0; + + // Get the alignment of the load in bytes. We assume that it is safe to load + // any legal integer up to this size without a problem. For example, if we're + // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can + // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it + // to i16. + unsigned LoadAlign = LI->getAlignment(); + + int64_t MemLocEnd = MemLocOffs+MemLocSize; + + // If no amount of rounding up will let MemLoc fit into LI, then bail out. + if (LIOffs+LoadAlign < MemLocEnd) return 0; + + // This is the size of the load to try. Start with the next larger power of + // two. + unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U; + NewLoadByteSize = NextPowerOf2(NewLoadByteSize); + + while (1) { + // If this load size is bigger than our known alignment or would not fit + // into a native integer register, then we fail. + if (NewLoadByteSize > LoadAlign || + !TD.fitsInLegalInteger(NewLoadByteSize*8)) + return 0; + + // If a load of this width would include all of MemLoc, then we succeed. + if (LIOffs+NewLoadByteSize >= MemLocEnd) + return NewLoadByteSize; + + NewLoadByteSize <<= 1; + } + + return 0; +} + /// getPointerDependencyFrom - Return the instruction on which a memory /// location depends. If isLoad is true, this routine ignores may-aliases with /// read-only operations. If isLoad is false, this routine ignores may-aliases @@ -229,58 +320,31 @@ MemDepResult MemoryDependenceAnalysis:: getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB) { - Value *InvariantTag = 0; - + const Value *MemLocBase = 0; + int64_t MemLocOffset = 0; + // Walk backwards through the basic block, looking for dependencies. while (ScanIt != BB->begin()) { Instruction *Inst = --ScanIt; - // If we're in an invariant region, no dependencies can be found before - // we pass an invariant-begin marker. - if (InvariantTag == Inst) { - InvariantTag = 0; - continue; - } - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { // Debug intrinsics don't (and can't) cause dependences. if (isa<DbgInfoIntrinsic>(II)) continue; - // If we pass an invariant-end marker, then we've just entered an - // invariant region and can start ignoring dependencies. - if (II->getIntrinsicID() == Intrinsic::invariant_end) { - // FIXME: This only considers queries directly on the invariant-tagged - // pointer, not on query pointers that are indexed off of them. It'd - // be nice to handle that at some point. - AliasAnalysis::AliasResult R = - AA->alias(AliasAnalysis::Location(II->getArgOperand(2)), MemLoc); - if (R == AliasAnalysis::MustAlias) - InvariantTag = II->getArgOperand(0); - - continue; - } - // If we reach a lifetime begin or end marker, then the query ends here // because the value is undefined. if (II->getIntrinsicID() == Intrinsic::lifetime_start) { // FIXME: This only considers queries directly on the invariant-tagged // pointer, not on query pointers that are indexed off of them. It'd - // be nice to handle that at some point. - AliasAnalysis::AliasResult R = - AA->alias(AliasAnalysis::Location(II->getArgOperand(1)), MemLoc); - if (R == AliasAnalysis::MustAlias) + // be nice to handle that at some point (the right approach is to use + // GetPointerBaseWithConstantOffset). + if (AA->isMustAlias(AliasAnalysis::Location(II->getArgOperand(1)), + MemLoc)) return MemDepResult::getDef(II); continue; } } - // If we're querying on a load and we're in an invariant region, we're done - // at this point. Nothing a load depends on can live in an invariant region. - // - // FIXME: this will prevent us from returning load/load must-aliases, so GVN - // won't remove redundant loads. - if (isLoad && InvariantTag) continue; - // Values depend on loads if the pointers are must aliased. This means that // a load depends on another must aliased load from the same value. if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { @@ -288,27 +352,51 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // If we found a pointer, check if it could be the same as our pointer. AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc); - if (R == AliasAnalysis::NoAlias) - continue; - // May-alias loads don't depend on each other without a dependence. - if (isLoad && R != AliasAnalysis::MustAlias) + if (isLoad) { + if (R == AliasAnalysis::NoAlias) { + // If this is an over-aligned integer load (for example, + // "load i8* %P, align 4") see if it would obviously overlap with the + // queried location if widened to a larger load (e.g. if the queried + // location is 1 byte at P+1). If so, return it as a load/load + // clobber result, allowing the client to decide to widen the load if + // it wants to. + if (const IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) + if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() && + isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase, + MemLocOffset, LI, TD)) + return MemDepResult::getClobber(Inst); + + continue; + } + + // Must aliased loads are defs of each other. + if (R == AliasAnalysis::MustAlias) + return MemDepResult::getDef(Inst); + + // If we have a partial alias, then return this as a clobber for the + // client to handle. + if (R == AliasAnalysis::PartialAlias) + return MemDepResult::getClobber(Inst); + + // Random may-alias loads don't depend on each other without a + // dependence. + continue; + } + + // Stores don't depend on other no-aliased accesses. + if (R == AliasAnalysis::NoAlias) continue; // Stores don't alias loads from read-only memory. - if (!isLoad && AA->pointsToConstantMemory(LoadLoc)) + if (AA->pointsToConstantMemory(LoadLoc)) continue; - // Stores depend on may and must aliased loads, loads depend on must-alias - // loads. + // Stores depend on may/must aliased loads. return MemDepResult::getDef(Inst); } if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { - // There can't be stores to the value we care about inside an - // invariant region. - if (InvariantTag) continue; - // If alias analysis can tell that this store is guaranteed to not modify // the query pointer, ignore it. Use getModRefInfo to handle cases where // the query pointer points to constant memory etc. @@ -341,8 +429,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, (isa<CallInst>(Inst) && extractMallocCall(Inst))) { const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD); - if (AccessPtr == Inst || - AA->alias(Inst, 1, AccessPtr, 1) == AliasAnalysis::MustAlias) + if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr)) return MemDepResult::getDef(Inst); continue; } @@ -353,9 +440,6 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // If the call has no effect on the queried pointer, just ignore it. continue; case AliasAnalysis::Mod: - // If we're in an invariant region, we can ignore calls that ONLY - // modify the pointer. - if (InvariantTag) continue; return MemDepResult::getClobber(Inst); case AliasAnalysis::Ref: // If the call is known to never store to the pointer, and if this is a diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index 93da5a4..70dcd0d 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" diff --git a/lib/Analysis/PathNumbering.cpp b/lib/Analysis/PathNumbering.cpp index 5d3f6bb..7c584da 100644 --- a/lib/Analysis/PathNumbering.cpp +++ b/lib/Analysis/PathNumbering.cpp @@ -38,13 +38,10 @@ #include "llvm/Support/TypeBuilder.h" #include "llvm/Support/raw_ostream.h" -#include <map> #include <queue> -#include <set> #include <stack> #include <string> #include <utility> -#include <vector> #include <sstream> using namespace llvm; @@ -286,7 +283,7 @@ void BallLarusDag::calculatePathNumbers() { BallLarusEdge* exitEdge = addEdge(node, getExit(), 0); exitEdge->setType(BallLarusEdge::SPLITEDGE_PHONY); - // Counters to handle the possibilty of a multi-graph + // Counters to handle the possibility of a multi-graph BasicBlock* oldTarget = 0; unsigned duplicateNumber = 0; diff --git a/lib/Analysis/PathProfileVerifier.cpp b/lib/Analysis/PathProfileVerifier.cpp index c549773..0ae734e 100644 --- a/lib/Analysis/PathProfileVerifier.cpp +++ b/lib/Analysis/PathProfileVerifier.cpp @@ -124,7 +124,7 @@ bool PathProfileVerifier::runOnModule (Module &M) { ProfilePathEdgeVector* pev = currentPath->getPathEdges(); DEBUG(dbgs () << "path #" << currentPath->getNumber() << ": " << currentPath->getCount() << "\n"); - // setup the entry edge (normally path profiling doens't care about this) + // setup the entry edge (normally path profiling doesn't care about this) if (currentPath->getFirstBlockInPath() == &F->getEntryBlock()) edgeArray[arrayMap[0][currentPath->getFirstBlockInPath()][0]] += currentPath->getCount(); diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp index 3f0deab..6ed2729 100644 --- a/lib/Analysis/PostDominators.cpp +++ b/lib/Analysis/PostDominators.cpp @@ -28,7 +28,6 @@ using namespace llvm; //===----------------------------------------------------------------------===// char PostDominatorTree::ID = 0; -char PostDominanceFrontier::ID = 0; INITIALIZE_PASS(PostDominatorTree, "postdomtree", "Post-Dominator Tree Construction", true, true) @@ -50,53 +49,3 @@ FunctionPass* llvm::createPostDomTree() { return new PostDominatorTree(); } -//===----------------------------------------------------------------------===// -// PostDominanceFrontier Implementation -//===----------------------------------------------------------------------===// - -INITIALIZE_PASS_BEGIN(PostDominanceFrontier, "postdomfrontier", - "Post-Dominance Frontier Construction", true, true) -INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) -INITIALIZE_PASS_END(PostDominanceFrontier, "postdomfrontier", - "Post-Dominance Frontier Construction", true, true) - -const DominanceFrontier::DomSetType & -PostDominanceFrontier::calculate(const PostDominatorTree &DT, - const DomTreeNode *Node) { - // Loop over CFG successors to calculate DFlocal[Node] - BasicBlock *BB = Node->getBlock(); - DomSetType &S = Frontiers[BB]; // The new set to fill in... - if (getRoots().empty()) return S; - - if (BB) - for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB); - SI != SE; ++SI) { - BasicBlock *P = *SI; - // Does Node immediately dominate this predecessor? - DomTreeNode *SINode = DT[P]; - if (SINode && SINode->getIDom() != Node) - S.insert(P); - } - - // At this point, S is DFlocal. Now we union in DFup's of our children... - // Loop through and visit the nodes that Node immediately dominates (Node's - // children in the IDomTree) - // - for (DomTreeNode::const_iterator - NI = Node->begin(), NE = Node->end(); NI != NE; ++NI) { - DomTreeNode *IDominee = *NI; - const DomSetType &ChildDF = calculate(DT, IDominee); - - DomSetType::const_iterator CDFI = ChildDF.begin(), CDFE = ChildDF.end(); - for (; CDFI != CDFE; ++CDFI) { - if (!DT.properlyDominates(Node, DT[*CDFI])) - S.insert(*CDFI); - } - } - - return S; -} - -FunctionPass* llvm::createPostDomFrontier() { - return new PostDominanceFrontier(); -} diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp index 667ee1c..b594e2b 100644 --- a/lib/Analysis/ProfileEstimatorPass.cpp +++ b/lib/Analysis/ProfileEstimatorPass.cpp @@ -140,7 +140,7 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) { // loop, thus the edge is a backedge, continue and do not check if the // value is valid. if (BBisHeader && BBLoop->contains(*bbi)) { - printEdgeError(edge, "but is backedge, continueing"); + printEdgeError(edge, "but is backedge, continuing"); continue; } // If the edges value is missing (and this is no loop header, and this is diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp index 36f211e..173de2c 100644 --- a/lib/Analysis/ProfileInfo.cpp +++ b/lib/Analysis/ProfileInfo.cpp @@ -309,9 +309,9 @@ void ProfileInfoT<Function,BasicBlock>:: removeEdge(oldedge); } -/// Replaces all occurences of RmBB in the ProfilingInfo with DestBB. +/// Replaces all occurrences of RmBB in the ProfilingInfo with DestBB. /// This checks all edges of the function the blocks reside in and replaces the -/// occurences of RmBB with DestBB. +/// occurrences of RmBB with DestBB. template<> void ProfileInfoT<Function,BasicBlock>:: replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) { @@ -812,7 +812,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) { } if (iw < 0) continue; - // Check the recieving end of the path if it can handle the flow. + // Check the receiving end of the path if it can handle the flow. double ow = getExecutionCount(Dest); Processed.clear(); for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); diff --git a/lib/Analysis/ProfileInfoLoader.cpp b/lib/Analysis/ProfileInfoLoader.cpp index 25481b2..eaa38da 100644 --- a/lib/Analysis/ProfileInfoLoader.cpp +++ b/lib/Analysis/ProfileInfoLoader.cpp @@ -19,7 +19,6 @@ #include "llvm/Support/raw_ostream.h" #include <cstdio> #include <cstdlib> -#include <map> using namespace llvm; // ByteSwap - Byteswap 'Var' if 'Really' is true. diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp index e2f6a8b..52753cb 100644 --- a/lib/Analysis/RegionInfo.cpp +++ b/lib/Analysis/RegionInfo.cpp @@ -41,16 +41,15 @@ VerifyRegionInfoX("verify-region-info", cl::location(VerifyRegionInfo), STATISTIC(numRegions, "The # of regions"); STATISTIC(numSimpleRegions, "The # of simple regions"); -//===----------------------------------------------------------------------===// -/// PrintStyle - Print region in difference ways. -enum PrintStyle { PrintNone, PrintBB, PrintRN }; - -static cl::opt<enum PrintStyle> printStyle("print-region-style", cl::Hidden, +static cl::opt<enum Region::PrintStyle> printStyle("print-region-style", + cl::Hidden, cl::desc("style of printing regions"), cl::values( - clEnumValN(PrintNone, "none", "print no details"), - clEnumValN(PrintBB, "bb", "print regions in detail with block_iterator"), - clEnumValN(PrintRN, "rn", "print regions in detail with element_iterator"), + clEnumValN(Region::PrintNone, "none", "print no details"), + clEnumValN(Region::PrintBB, "bb", + "print regions in detail with block_iterator"), + clEnumValN(Region::PrintRN, "rn", + "print regions in detail with element_iterator"), clEnumValEnd)); //===----------------------------------------------------------------------===// /// Region Implementation @@ -413,7 +412,8 @@ Region *Region::getExpandedRegion() const { return new Region(getEntry(), R->getExit(), RI, DT); } -void Region::print(raw_ostream &OS, bool print_tree, unsigned level) const { +void Region::print(raw_ostream &OS, bool print_tree, unsigned level, + enum PrintStyle Style) const { if (print_tree) OS.indent(level*2) << "[" << level << "] " << getNameStr(); else @@ -422,14 +422,14 @@ void Region::print(raw_ostream &OS, bool print_tree, unsigned level) const { OS << "\n"; - if (printStyle != PrintNone) { + if (Style != PrintNone) { OS.indent(level*2) << "{\n"; OS.indent(level*2 + 2); - if (printStyle == PrintBB) { + if (Style == PrintBB) { for (const_block_iterator I = block_begin(), E = block_end(); I!=E; ++I) OS << **I << ", "; // TODO: remove the last "," - } else if (printStyle == PrintRN) { + } else if (Style == PrintRN) { for (const_element_iterator I = element_begin(), E = element_end(); I!=E; ++I) OS << **I << ", "; // TODO: remove the last ", } @@ -439,14 +439,14 @@ void Region::print(raw_ostream &OS, bool print_tree, unsigned level) const { if (print_tree) for (const_iterator RI = begin(), RE = end(); RI != RE; ++RI) - (*RI)->print(OS, print_tree, level+1); + (*RI)->print(OS, print_tree, level+1, Style); - if (printStyle != PrintNone) + if (Style != PrintNone) OS.indent(level*2) << "} \n"; } void Region::dump() const { - print(dbgs(), true, getDepth()); + print(dbgs(), true, getDepth(), printStyle.getValue()); } void Region::clearNodeCache() { @@ -714,7 +714,7 @@ void RegionInfo::getAnalysisUsage(AnalysisUsage &AU) const { void RegionInfo::print(raw_ostream &OS, const Module *) const { OS << "Region tree:\n"; - TopLevelRegion->print(OS, true, 0); + TopLevelRegion->print(OS, true, 0, printStyle.getValue()); OS << "End region tree\n"; } diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp index 0cf0f90..a1730b0 100644 --- a/lib/Analysis/RegionPrinter.cpp +++ b/lib/Analysis/RegionPrinter.cpp @@ -70,6 +70,32 @@ struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> { G->getTopLevelRegion()); } + std::string getEdgeAttributes(RegionNode *srcNode, + GraphTraits<RegionInfo*>::ChildIteratorType CI, RegionInfo *RI) { + + RegionNode *destNode = *CI; + + if (srcNode->isSubRegion() || destNode->isSubRegion()) + return ""; + + // In case of a backedge, do not use it to define the layout of the nodes. + BasicBlock *srcBB = srcNode->getNodeAs<BasicBlock>(); + BasicBlock *destBB = destNode->getNodeAs<BasicBlock>(); + + Region *R = RI->getRegionFor(destBB); + + while (R && R->getParent()) + if (R->getParent()->getEntry() == destBB) + R = R->getParent(); + else + break; + + if (R->getEntry() == destBB && R->contains(srcBB)) + return "constraint=false"; + + return ""; + } + // Print the cluster of the subregions. This groups the single basic blocks // and adds a different background color for each group. static void printRegionCluster(const Region *R, GraphWriter<RegionInfo*> &GW, diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 62244cc..bab4619 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -157,10 +157,13 @@ void SCEV::print(raw_ostream &OS) const { for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i) OS << ",+," << *AR->getOperand(i); OS << "}<"; - if (AR->hasNoUnsignedWrap()) + if (AR->getNoWrapFlags(FlagNUW)) OS << "nuw><"; - if (AR->hasNoSignedWrap()) + if (AR->getNoWrapFlags(FlagNSW)) OS << "nsw><"; + if (AR->getNoWrapFlags(FlagNW) && + !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW))) + OS << "nw><"; WriteAsOperand(OS, AR->getLoop()->getHeader(), /*PrintType=*/false); OS << ">"; return; @@ -203,7 +206,7 @@ void SCEV::print(raw_ostream &OS) const { OS << "alignof(" << *AllocTy << ")"; return; } - + const Type *CTy; Constant *FieldNo; if (U->isOffsetOf(CTy, FieldNo)) { @@ -212,7 +215,7 @@ void SCEV::print(raw_ostream &OS) const { OS << ")"; return; } - + // Otherwise just print it normally. WriteAsOperand(OS, U->getValue(), false); return; @@ -830,7 +833,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Operands.push_back(S); } if (!hasTrunc) - return getAddExpr(Operands, false, false); + return getAddExpr(Operands); UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL. } @@ -845,7 +848,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Operands.push_back(S); } if (!hasTrunc) - return getMulExpr(Operands, false, false); + return getMulExpr(Operands); UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL. } @@ -854,7 +857,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, SmallVector<const SCEV *, 4> Operands; for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty)); - return getAddRecExpr(Operands, AddRec->getLoop()); + return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap); } // As a special case, fold trunc(undef) to undef. We don't want to @@ -926,10 +929,10 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // If we have special knowledge that this addrec won't overflow, // we don't need to do any further analysis. - if (AR->hasNoUnsignedWrap()) + if (AR->getNoWrapFlags(SCEV::FlagNUW)) return getAddRecExpr(getZeroExtendExpr(Start, Ty), getZeroExtendExpr(Step, Ty), - L); + L, AR->getNoWrapFlags()); // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are @@ -959,12 +962,14 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, getAddExpr(getZeroExtendExpr(Start, WideTy), getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), getZeroExtendExpr(Step, WideTy))); - if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) + if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) { + // Cache knowledge of AR NUW, which is propagated to this AddRec. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); // Return the expression with the addrec on the outside. return getAddRecExpr(getZeroExtendExpr(Start, Ty), getZeroExtendExpr(Step, Ty), - L); - + L, AR->getNoWrapFlags()); + } // Similar to above, only this time treat the step value as signed. // This covers loops that count down. const SCEV *SMul = getMulExpr(CastedMaxBECount, Step); @@ -973,11 +978,15 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, getAddExpr(getZeroExtendExpr(Start, WideTy), getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), getSignExtendExpr(Step, WideTy))); - if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) + if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) { + // Cache knowledge of AR NW, which is propagated to this AddRec. + // Negative step causes unsigned wrap, but it still can't self-wrap. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW); // Return the expression with the addrec on the outside. return getAddRecExpr(getZeroExtendExpr(Start, Ty), getSignExtendExpr(Step, Ty), - L); + L, AR->getNoWrapFlags()); + } } // If the backedge is guarded by a comparison with the pre-inc value @@ -990,22 +999,29 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) || (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) && isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, - AR->getPostIncExpr(*this), N))) + AR->getPostIncExpr(*this), N))) { + // Cache knowledge of AR NUW, which is propagated to this AddRec. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); // Return the expression with the addrec on the outside. return getAddRecExpr(getZeroExtendExpr(Start, Ty), getZeroExtendExpr(Step, Ty), - L); + L, AR->getNoWrapFlags()); + } } else if (isKnownNegative(Step)) { const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - getSignedRange(Step).getSignedMin()); if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) || (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) && isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, - AR->getPostIncExpr(*this), N))) + AR->getPostIncExpr(*this), N))) { + // Cache knowledge of AR NW, which is propagated to this AddRec. + // Negative step causes unsigned wrap, but it still can't self-wrap. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW); // Return the expression with the addrec on the outside. return getAddRecExpr(getZeroExtendExpr(Start, Ty), getSignExtendExpr(Step, Ty), - L); + L, AR->getNoWrapFlags()); + } } } } @@ -1080,10 +1096,10 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // If we have special knowledge that this addrec won't overflow, // we don't need to do any further analysis. - if (AR->hasNoSignedWrap()) + if (AR->getNoWrapFlags(SCEV::FlagNSW)) return getAddRecExpr(getSignExtendExpr(Start, Ty), getSignExtendExpr(Step, Ty), - L); + L, SCEV::FlagNSW); // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are @@ -1113,12 +1129,14 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, getAddExpr(getSignExtendExpr(Start, WideTy), getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), getSignExtendExpr(Step, WideTy))); - if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) + if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) { + // Cache knowledge of AR NSW, which is propagated to this AddRec. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. return getAddRecExpr(getSignExtendExpr(Start, Ty), getSignExtendExpr(Step, Ty), - L); - + L, AR->getNoWrapFlags()); + } // Similar to above, only this time treat the step value as unsigned. // This covers loops that count up with an unsigned step. const SCEV *UMul = getMulExpr(CastedMaxBECount, Step); @@ -1127,11 +1145,14 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, getAddExpr(getSignExtendExpr(Start, WideTy), getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), getZeroExtendExpr(Step, WideTy))); - if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) + if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) { + // Cache knowledge of AR NSW, which is propagated to this AddRec. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. return getAddRecExpr(getSignExtendExpr(Start, Ty), getZeroExtendExpr(Step, Ty), - L); + L, AR->getNoWrapFlags()); + } } // If the backedge is guarded by a comparison with the pre-inc value @@ -1144,22 +1165,28 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR, N) || (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) && isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, - AR->getPostIncExpr(*this), N))) + AR->getPostIncExpr(*this), N))) { + // Cache knowledge of AR NSW, which is propagated to this AddRec. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. return getAddRecExpr(getSignExtendExpr(Start, Ty), getSignExtendExpr(Step, Ty), - L); + L, AR->getNoWrapFlags()); + } } else if (isKnownNegative(Step)) { const SCEV *N = getConstant(APInt::getSignedMaxValue(BitWidth) - getSignedRange(Step).getSignedMin()); if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR, N) || (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) && isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, - AR->getPostIncExpr(*this), N))) + AR->getPostIncExpr(*this), N))) { + // Cache knowledge of AR NSW, which is propagated to this AddRec. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. return getAddRecExpr(getSignExtendExpr(Start, Ty), getSignExtendExpr(Step, Ty), - L); + L, AR->getNoWrapFlags()); + } } } } @@ -1213,7 +1240,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); I != E; ++I) Ops.push_back(getAnyExtendExpr(*I, Ty)); - return getAddRecExpr(Ops, AR->getLoop()); + return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW); } // As a special case, fold anyext(undef) to undef. We don't want to @@ -1334,7 +1361,9 @@ namespace { /// getAddExpr - Get a canonical add expression, or something simpler if /// possible. const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, - bool HasNUW, bool HasNSW) { + SCEV::NoWrapFlags Flags) { + assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) && + "only nuw or nsw allowed"); assert(!Ops.empty() && "Cannot get empty add!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG @@ -1344,8 +1373,11 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, "SCEVAddExpr operand types don't match!"); #endif - // If HasNSW is true and all the operands are non-negative, infer HasNUW. - if (!HasNUW && HasNSW) { + // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. + // And vice-versa. + int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW; + SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask); + if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) { bool All = true; for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(), E = Ops.end(); I != E; ++I) @@ -1353,7 +1385,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, All = false; break; } - if (All) HasNUW = true; + if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask); } // Sort by complexity, this groups all similar expression types together. @@ -1404,7 +1436,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, FoundMatch = true; } if (FoundMatch) - return getAddExpr(Ops, HasNUW, HasNSW); + return getAddExpr(Ops, Flags); // Check for truncates. If all the operands are truncated from the same // type, see if factoring out the truncate would permit the result to be @@ -1454,7 +1486,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, } if (Ok) { // Evaluate the expression in the larger type. - const SCEV *Fold = getAddExpr(LargeOps, HasNUW, HasNSW); + const SCEV *Fold = getAddExpr(LargeOps, Flags); // If it folds to something simple, use it. Otherwise, don't. if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold)) return getTruncateExpr(Fold, DstType); @@ -1625,9 +1657,9 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // Build the new addrec. Propagate the NUW and NSW flags if both the // outer add and the inner addrec are guaranteed to have no overflow. - const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, - HasNUW && AddRec->hasNoUnsignedWrap(), - HasNSW && AddRec->hasNoSignedWrap()); + // Always propagate NW. + Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW)); + const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; @@ -1668,7 +1700,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, } Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; } - Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop); + // Step size has changed, so we cannot guarantee no self-wraparound. + Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap); return getAddExpr(Ops); } @@ -1692,15 +1725,16 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); } - if (HasNUW) S->setHasNoUnsignedWrap(true); - if (HasNSW) S->setHasNoSignedWrap(true); + S->setNoWrapFlags(Flags); return S; } /// getMulExpr - Get a canonical multiply expression, or something simpler if /// possible. const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, - bool HasNUW, bool HasNSW) { + SCEV::NoWrapFlags Flags) { + assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) && + "only nuw or nsw allowed"); assert(!Ops.empty() && "Cannot get empty mul!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG @@ -1710,8 +1744,11 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, "SCEVMulExpr operand types don't match!"); #endif - // If HasNSW is true and all the operands are non-negative, infer HasNUW. - if (!HasNUW && HasNSW) { + // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. + // And vice-versa. + int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW; + SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask); + if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) { bool All = true; for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(), E = Ops.end(); I != E; ++I) @@ -1719,7 +1756,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, All = false; break; } - if (All) HasNUW = true; + if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask); } // Sort by complexity, this groups all similar expression types together. @@ -1759,12 +1796,12 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, } else if (Ops[0]->isAllOnesValue()) { // If we have a mul by -1 of an add, try distributing the -1 among the // add operands. - if (Ops.size() == 2) + if (Ops.size() == 2) { if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) { SmallVector<const SCEV *, 4> NewOps; bool AnyFolded = false; - for (SCEVAddRecExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); - I != E; ++I) { + for (SCEVAddRecExpr::op_iterator I = Add->op_begin(), + E = Add->op_end(); I != E; ++I) { const SCEV *Mul = getMulExpr(Ops[0], *I); if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true; NewOps.push_back(Mul); @@ -1772,6 +1809,18 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, if (AnyFolded) return getAddExpr(NewOps); } + else if (const SCEVAddRecExpr * + AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) { + // Negation preserves a recurrence's no self-wrap property. + SmallVector<const SCEV *, 4> Operands; + for (SCEVAddRecExpr::op_iterator I = AddRec->op_begin(), + E = AddRec->op_end(); I != E; ++I) { + Operands.push_back(getMulExpr(Ops[0], *I)); + } + return getAddRecExpr(Operands, AddRec->getLoop(), + AddRec->getNoWrapFlags(SCEV::FlagNW)); + } + } } if (Ops.size() == 1) @@ -1831,9 +1880,11 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // Build the new addrec. Propagate the NUW and NSW flags if both the // outer mul and the inner addrec are guaranteed to have no overflow. - const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, - HasNUW && AddRec->hasNoUnsignedWrap(), - HasNSW && AddRec->hasNoSignedWrap()); + // + // No self-wrap cannot be guaranteed after changing the step size, but + // will be inferred if either NUW or NSW is true. + Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW)); + const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; @@ -1869,7 +1920,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, getMulExpr(G, B), getMulExpr(B, D)); const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep, - F->getLoop()); + F->getLoop(), + SCEV::FlagAnyWrap); if (Ops.size() == 2) return NewAddRec; Ops[Idx] = AddRec = cast<SCEVAddRecExpr>(NewAddRec); Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; @@ -1897,8 +1949,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); } - if (HasNUW) S->setHasNoUnsignedWrap(true); - if (HasNSW) S->setHasNoSignedWrap(true); + S->setNoWrapFlags(Flags); return S; } @@ -1938,11 +1989,12 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, getZeroExtendExpr(AR, ExtTy) == getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), getZeroExtendExpr(Step, ExtTy), - AR->getLoop())) { + AR->getLoop(), SCEV::FlagAnyWrap)) { SmallVector<const SCEV *, 4> Operands; for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i) Operands.push_back(getUDivExpr(AR->getOperand(i), RHS)); - return getAddRecExpr(Operands, AR->getLoop()); + return getAddRecExpr(Operands, AR->getLoop(), + SCEV::FlagNW); } // (A*B)/C --> A*(B/C) if safe and B/C can be folded. if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) { @@ -1963,7 +2015,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, } } // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded. - if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(LHS)) { + if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) { SmallVector<const SCEV *, 4> Operands; for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy)); @@ -2006,27 +2058,26 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, /// getAddRecExpr - Get an add recurrence expression for the specified loop. /// Simplify the expression as much as possible. -const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, - const SCEV *Step, const Loop *L, - bool HasNUW, bool HasNSW) { +const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step, + const Loop *L, + SCEV::NoWrapFlags Flags) { SmallVector<const SCEV *, 4> Operands; Operands.push_back(Start); if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step)) if (StepChrec->getLoop() == L) { Operands.append(StepChrec->op_begin(), StepChrec->op_end()); - return getAddRecExpr(Operands, L); + return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW)); } Operands.push_back(Step); - return getAddRecExpr(Operands, L, HasNUW, HasNSW); + return getAddRecExpr(Operands, L, Flags); } /// getAddRecExpr - Get an add recurrence expression for the specified loop. /// Simplify the expression as much as possible. const SCEV * ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, - const Loop *L, - bool HasNUW, bool HasNSW) { + const Loop *L, SCEV::NoWrapFlags Flags) { if (Operands.size() == 1) return Operands[0]; #ifndef NDEBUG const Type *ETy = getEffectiveSCEVType(Operands[0]->getType()); @@ -2040,7 +2091,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, if (Operands.back()->isZero()) { Operands.pop_back(); - return getAddRecExpr(Operands, L, HasNUW, HasNSW); // {X,+,0} --> X + return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0} --> X } // It's tempting to want to call getMaxBackedgeTakenCount count here and @@ -2049,8 +2100,11 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, // meaningful BE count at this point (and if we don't, we'd be stuck // with a SCEVCouldNotCompute as the cached BE count). - // If HasNSW is true and all the operands are non-negative, infer HasNUW. - if (!HasNUW && HasNSW) { + // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. + // And vice-versa. + int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW; + SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask); + if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) { bool All = true; for (SmallVectorImpl<const SCEV *>::const_iterator I = Operands.begin(), E = Operands.end(); I != E; ++I) @@ -2058,7 +2112,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, All = false; break; } - if (All) HasNUW = true; + if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask); } // Canonicalize nested AddRecs in by nesting them in order of loop depth. @@ -2081,16 +2135,29 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, break; } if (AllInvariant) { - NestedOperands[0] = getAddRecExpr(Operands, L); + // Create a recurrence for the outer loop with the same step size. + // + // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the + // inner recurrence has the same property. + SCEV::NoWrapFlags OuterFlags = + maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags()); + + NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags); AllInvariant = true; for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i) if (!isLoopInvariant(NestedOperands[i], NestedLoop)) { AllInvariant = false; break; } - if (AllInvariant) + if (AllInvariant) { // Ok, both add recurrences are valid after the transformation. - return getAddRecExpr(NestedOperands, NestedLoop, HasNUW, HasNSW); + // + // The inner recurrence keeps its NW flag but only keeps NUW/NSW if + // the outer recurrence has the same property. + SCEV::NoWrapFlags InnerFlags = + maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags); + return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags); + } } // Reset Operands to its original state. Operands[0] = NestedAR; @@ -2114,8 +2181,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, O, Operands.size(), L); UniqueSCEVs.InsertNode(S, IP); } - if (HasNUW) S->setHasNoUnsignedWrap(true); - if (HasNSW) S->setHasNoSignedWrap(true); + S->setNoWrapFlags(Flags); return S; } @@ -2510,17 +2576,17 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { return getMinusSCEV(AllOnes, V); } -/// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1, -/// and thus the HasNUW and HasNSW bits apply to the resultant add, not -/// whether the sub would have overflowed. +/// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1. const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, - bool HasNUW, bool HasNSW) { + SCEV::NoWrapFlags Flags) { + assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW"); + // Fast path: X - X --> 0. if (LHS == RHS) return getConstant(LHS->getType(), 0); // X - Y --> X + -Y - return getAddExpr(LHS, getNegativeSCEV(RHS), HasNUW, HasNSW); + return getAddExpr(LHS, getNegativeSCEV(RHS), Flags); } /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the @@ -2652,6 +2718,36 @@ const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS, return getUMinExpr(PromotedLHS, PromotedRHS); } +/// getPointerBase - Transitively follow the chain of pointer-type operands +/// until reaching a SCEV that does not have a single pointer operand. This +/// returns a SCEVUnknown pointer for well-formed pointer-type expressions, +/// but corner cases do exist. +const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) { + // A pointer operand may evaluate to a nonpointer expression, such as null. + if (!V->getType()->isPointerTy()) + return V; + + if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) { + return getPointerBase(Cast->getOperand()); + } + else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) { + const SCEV *PtrOp = 0; + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); + I != E; ++I) { + if ((*I)->getType()->isPointerTy()) { + // Cannot find the base of an expression with multiple pointer operands. + if (PtrOp) + return V; + PtrOp = *I; + } + } + if (!PtrOp) + return V; + return getPointerBase(PtrOp); + } + return V; +} + /// PushDefUseChildren - Push users of the given Instruction /// onto the given Worklist. static void @@ -2773,44 +2869,34 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { if (isLoopInvariant(Accum, L) || (isa<SCEVAddRecExpr>(Accum) && cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) { - bool HasNUW = false; - bool HasNSW = false; + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; // If the increment doesn't overflow, then neither the addrec nor // the post-increment will overflow. if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) { if (OBO->hasNoUnsignedWrap()) - HasNUW = true; + Flags = setFlags(Flags, SCEV::FlagNUW); if (OBO->hasNoSignedWrap()) - HasNSW = true; - } else if (const GEPOperator *GEP = - dyn_cast<GEPOperator>(BEValueV)) { - // If the increment is a GEP, then we know it won't perform a - // signed overflow, because the address space cannot be - // wrapped around. - // - // NOTE: This isn't strictly true, because you could have an - // object straddling the 2G address boundary in a 32-bit address - // space (for example). We really want to model this as a "has - // no signed/unsigned wrap" where the base pointer is treated as - // unsigned and the increment is known to not have signed - // wrapping. - // - // This is a highly theoretical concern though, and this is good - // enough for all cases we know of at this point. :) - // - HasNSW |= GEP->isInBounds(); + Flags = setFlags(Flags, SCEV::FlagNSW); + } else if (const GEPOperator *GEP = + dyn_cast<GEPOperator>(BEValueV)) { + // If the increment is an inbounds GEP, then we know the address + // space cannot be wrapped around. We cannot make any guarantee + // about signed or unsigned overflow because pointers are + // unsigned but we may have a negative index from the base + // pointer. + if (GEP->isInBounds()) + Flags = setFlags(Flags, SCEV::FlagNW); } const SCEV *StartVal = getSCEV(StartValueV); - const SCEV *PHISCEV = - getAddRecExpr(StartVal, Accum, L, HasNUW, HasNSW); + const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); // Since the no-wrap flags are on the increment, they apply to the // post-incremented value as well. if (isLoopInvariant(Accum, L)) (void)getAddRecExpr(getAddExpr(StartVal, Accum), - Accum, L, HasNUW, HasNSW); + Accum, L, Flags); // Okay, for the entire analysis of this edge we assumed the PHI // to be symbolic. We now need to go back and purge all of the @@ -2834,8 +2920,11 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // initial step of the addrec evolution. if (StartVal == getMinusSCEV(AddRec->getOperand(0), AddRec->getOperand(1))) { + // FIXME: For constant StartVal, we should be able to infer + // no-wrap flags. const SCEV *PHISCEV = - getAddRecExpr(StartVal, AddRec->getOperand(1), L); + getAddRecExpr(StartVal, AddRec->getOperand(1), L, + SCEV::FlagAnyWrap); // Okay, for the entire analysis of this edge we assumed the PHI // to be symbolic. We now need to go back and purge all of the @@ -2899,8 +2988,9 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy); // Multiply the index by the element size to compute the element offset. - const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, /*NUW*/ false, - /*NSW*/ isInBounds); + const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, + isInBounds ? SCEV::FlagNSW : + SCEV::FlagAnyWrap); // Add the element offset to the running total offset. TotalOffset = getAddExpr(TotalOffset, LocalOffset); @@ -2911,8 +3001,8 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { const SCEV *BaseS = getSCEV(Base); // Add the total offset from all the GEP indices to the base. - return getAddExpr(BaseS, TotalOffset, /*NUW*/ false, - /*NSW*/ isInBounds); + return getAddExpr(BaseS, TotalOffset, + isInBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap); } /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is @@ -3074,7 +3164,7 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { // If there's no unsigned wrap, the value will never be less than its // initial value. - if (AddRec->hasNoUnsignedWrap()) + if (AddRec->getNoWrapFlags(SCEV::FlagNUW)) if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart())) if (!C->getValue()->isZero()) ConservativeResult = @@ -3216,7 +3306,7 @@ ScalarEvolution::getSignedRange(const SCEV *S) { if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { // If there's no signed wrap, and all the operands have the same sign or // zero, the value won't ever change sign. - if (AddRec->hasNoSignedWrap()) { + if (AddRec->getNoWrapFlags(SCEV::FlagNSW)) { bool AllNonNeg = true; bool AllNonPos = true; for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { @@ -3349,7 +3439,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { SmallVector<const SCEV *, 4> MulOps; MulOps.push_back(getSCEV(U->getOperand(1))); for (Value *Op = U->getOperand(0); - Op->getValueID() == Instruction::Mul + Value::InstructionVal; + Op->getValueID() == Instruction::Mul + Value::InstructionVal; Op = U->getOperand(0)) { U = cast<Operator>(Op); MulOps.push_back(getSCEV(U->getOperand(1))); @@ -3411,10 +3501,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // transfer the no-wrap flags, since an or won't introduce a wrap. if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) { const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS); - if (OldAR->hasNoUnsignedWrap()) - const_cast<SCEVAddRecExpr *>(NewAR)->setHasNoUnsignedWrap(true); - if (OldAR->hasNoSignedWrap()) - const_cast<SCEVAddRecExpr *>(NewAR)->setHasNoSignedWrap(true); + const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags( + OldAR->getNoWrapFlags()); } return S; } @@ -3700,19 +3788,20 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { if (!Pair.second) return Pair.first->second; - BackedgeTakenInfo BECount = ComputeBackedgeTakenCount(L); - if (BECount.Exact != getCouldNotCompute()) { - assert(isLoopInvariant(BECount.Exact, L) && - isLoopInvariant(BECount.Max, L) && + BackedgeTakenInfo Result = getCouldNotCompute(); + BackedgeTakenInfo Computed = ComputeBackedgeTakenCount(L); + if (Computed.Exact != getCouldNotCompute()) { + assert(isLoopInvariant(Computed.Exact, L) && + isLoopInvariant(Computed.Max, L) && "Computed backedge-taken count isn't loop invariant for loop!"); ++NumTripCountsComputed; // Update the value in the map. - Pair.first->second = BECount; + Result = Computed; } else { - if (BECount.Max != getCouldNotCompute()) + if (Computed.Max != getCouldNotCompute()) // Update the value in the map. - Pair.first->second = BECount; + Result = Computed; if (isa<PHINode>(L->getHeader()->begin())) // Only count loops that have phi nodes as not being computable. ++NumTripCountsNotComputed; @@ -3723,7 +3812,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // conservative estimates made without the benefit of trip count // information. This is similar to the code in forgetLoop, except that // it handles SCEVUnknown PHI nodes specially. - if (BECount.hasAnyInfo()) { + if (Computed.hasAnyInfo()) { SmallVector<Instruction *, 16> Worklist; PushLoopPHIs(L, Worklist); @@ -3754,7 +3843,13 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { PushDefUseChildren(I, Worklist); } } - return Pair.first->second; + + // Re-lookup the insert position, since the call to + // ComputeBackedgeTakenCount above could result in a + // recusive call to getBackedgeTakenInfo (on a different + // loop), which would invalidate the iterator computed + // earlier. + return BackedgeTakenCounts.find(L)->second = Result; } /// forgetLoop - This method should be called by the client when it has @@ -4022,105 +4117,6 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L, return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB)); } -static const SCEVAddRecExpr * -isSimpleUnwrappingAddRec(const SCEV *S, const Loop *L) { - const SCEVAddRecExpr *SA = dyn_cast<SCEVAddRecExpr>(S); - - // The SCEV must be an addrec of this loop. - if (!SA || SA->getLoop() != L || !SA->isAffine()) - return 0; - - // The SCEV must be known to not wrap in some way to be interesting. - if (!SA->hasNoUnsignedWrap() && !SA->hasNoSignedWrap()) - return 0; - - // The stride must be a constant so that we know if it is striding up or down. - if (!isa<SCEVConstant>(SA->getOperand(1))) - return 0; - return SA; -} - -/// getMinusSCEVForExitTest - When considering an exit test for a loop with a -/// "x != y" exit test, we turn this into a computation that evaluates x-y != 0, -/// and this function returns the expression to use for x-y. We know and take -/// advantage of the fact that this subtraction is only being used in a -/// comparison by zero context. -/// -static const SCEV *getMinusSCEVForExitTest(const SCEV *LHS, const SCEV *RHS, - const Loop *L, ScalarEvolution &SE) { - // If either LHS or RHS is an AddRec SCEV (of this loop) that is known to not - // wrap (either NSW or NUW), then we know that the value will either become - // the other one (and thus the loop terminates), that the loop will terminate - // through some other exit condition first, or that the loop has undefined - // behavior. This information is useful when the addrec has a stride that is - // != 1 or -1, because it means we can't "miss" the exit value. - // - // In any of these three cases, it is safe to turn the exit condition into a - // "counting down" AddRec (to zero) by subtracting the two inputs as normal, - // but since we know that the "end cannot be missed" we can force the - // resulting AddRec to be a NUW addrec. Since it is counting down, this means - // that the AddRec *cannot* pass zero. - - // See if LHS and RHS are addrec's we can handle. - const SCEVAddRecExpr *LHSA = isSimpleUnwrappingAddRec(LHS, L); - const SCEVAddRecExpr *RHSA = isSimpleUnwrappingAddRec(RHS, L); - - // If neither addrec is interesting, just return a minus. - if (RHSA == 0 && LHSA == 0) - return SE.getMinusSCEV(LHS, RHS); - - // If only one of LHS and RHS are an AddRec of this loop, make sure it is LHS. - if (RHSA && LHSA == 0) { - // Safe because a-b === b-a for comparisons against zero. - std::swap(LHS, RHS); - std::swap(LHSA, RHSA); - } - - // Handle the case when only one is advancing in a non-overflowing way. - if (RHSA == 0) { - // If RHS is loop varying, then we can't predict when LHS will cross it. - if (!SE.isLoopInvariant(RHS, L)) - return SE.getMinusSCEV(LHS, RHS); - - // If LHS has a positive stride, then we compute RHS-LHS, because the loop - // is counting up until it crosses RHS (which must be larger than LHS). If - // it is negative, we compute LHS-RHS because we're counting down to RHS. - const ConstantInt *Stride = - cast<SCEVConstant>(LHSA->getOperand(1))->getValue(); - if (Stride->getValue().isNegative()) - std::swap(LHS, RHS); - - return SE.getMinusSCEV(RHS, LHS, true /*HasNUW*/); - } - - // If both LHS and RHS are interesting, we have something like: - // a+i*4 != b+i*8. - const ConstantInt *LHSStride = - cast<SCEVConstant>(LHSA->getOperand(1))->getValue(); - const ConstantInt *RHSStride = - cast<SCEVConstant>(RHSA->getOperand(1))->getValue(); - - // If the strides are equal, then this is just a (complex) loop invariant - // comparison of a and b. - if (LHSStride == RHSStride) - return SE.getMinusSCEV(LHSA->getStart(), RHSA->getStart()); - - // If the signs of the strides differ, then the negative stride is counting - // down to the positive stride. - if (LHSStride->getValue().isNegative() != RHSStride->getValue().isNegative()){ - if (RHSStride->getValue().isNegative()) - std::swap(LHS, RHS); - } else { - // If LHS's stride is smaller than RHS's stride, then "b" must be less than - // "a" and "b" is RHS is counting up (catching up) to LHS. This is true - // whether the strides are positive or negative. - if (RHSStride->getValue().slt(LHSStride->getValue())) - std::swap(LHS, RHS); - } - - return SE.getMinusSCEV(LHS, RHS, true /*HasNUW*/); -} - /// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the /// backedge of the specified loop will execute if its exit condition /// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB. @@ -4180,8 +4176,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, switch (Cond) { case ICmpInst::ICMP_NE: { // while (X != Y) // Convert to: while (X-Y != 0) - BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEVForExitTest(LHS, RHS, L, - *this), L); + BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEV(LHS, RHS), L); if (BTI.hasAnyInfo()) return BTI; break; } @@ -4706,7 +4701,15 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { for (++i; i != e; ++i) NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L)); - AddRec = cast<SCEVAddRecExpr>(getAddRecExpr(NewOps, AddRec->getLoop())); + const SCEV *FoldedRec = + getAddRecExpr(NewOps, AddRec->getLoop(), + AddRec->getNoWrapFlags(SCEV::FlagNW)); + AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec); + // The addrec may be folded to a nonrecurrence, for example, if the + // induction variable is multiplied by zero after constant folding. Go + // ahead and return the folded value. + if (!AddRec) + return FoldedRec; break; } @@ -4871,6 +4874,11 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { /// HowFarToZero - Return the number of times a backedge comparing the specified /// value to zero will execute. If not computable, return CouldNotCompute. +/// +/// This is only used for loops with a "x != y" exit test. The exit condition is +/// now expressed as a single expression, V = x-y. So the exit test is +/// effectively V != 0. We know and take advantage of the fact that this +/// expression only being used in a comparison by zero context. ScalarEvolution::BackedgeTakenInfo ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // If the value is a constant @@ -4903,7 +4911,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { R2->getValue()))) { if (CB->getZExtValue() == false) std::swap(R1, R2); // R1 is the minimum root now. - + // We can only use this value if the chrec ends up with an exact zero // value at this index. When solving for "X*X != 5", for example, we // should not accept a root of 2. @@ -4934,26 +4942,43 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop()); const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop()); - // If the AddRec is NUW, then (in an unsigned sense) it cannot be counting up - // to wrap to 0, it must be counting down to equal 0. Also, while counting - // down, it cannot "miss" 0 (which would cause it to wrap), regardless of what - // the stride is. As such, NUW addrec's will always become zero in - // "start / -stride" steps, and we know that the division is exact. - if (AddRec->hasNoUnsignedWrap()) - // FIXME: We really want an "isexact" bit for udiv. - return getUDivExpr(Start, getNegativeSCEV(Step)); - // For now we handle only constant steps. + // + // TODO: Handle a nonconstant Step given AddRec<NUW>. If the + // AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap + // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step. + // We have not yet seen any such cases. const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step); if (StepC == 0) return getCouldNotCompute(); - // First, handle unitary steps. - if (StepC->getValue()->equalsInt(1)) // 1*N = -Start (mod 2^BW), so: - return getNegativeSCEV(Start); // N = -Start (as unsigned) - - if (StepC->getValue()->isAllOnesValue()) // -1*N = -Start (mod 2^BW), so: - return Start; // N = Start (as unsigned) + // For positive steps (counting up until unsigned overflow): + // N = -Start/Step (as unsigned) + // For negative steps (counting down to zero): + // N = Start/-Step + // First compute the unsigned distance from zero in the direction of Step. + bool CountDown = StepC->getValue()->getValue().isNegative(); + const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start); + + // Handle unitary steps, which cannot wraparound. + // 1*N = -Start; -1*N = Start (mod 2^BW), so: + // N = Distance (as unsigned) + if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) + return Distance; + + // If the recurrence is known not to wraparound, unsigned divide computes the + // back edge count. We know that the value will either become zero (and thus + // the loop terminates), that the loop will terminate through some other exit + // condition first, or that the loop has undefined behavior. This means + // we can't "miss" the exit value, even with nonunit stride. + // + // FIXME: Prove that loops always exhibits *acceptable* undefined + // behavior. Loops must exhibit defined behavior until a wrapped value is + // actually used. So the trip count computed by udiv could be smaller than the + // number of well-defined iterations. + if (AddRec->getNoWrapFlags(SCEV::FlagNW)) + // FIXME: We really want an "isexact" bit for udiv. + return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); // Then, try to solve the above equation provided that Start is constant. if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) @@ -5220,12 +5245,12 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, case ICmpInst::ICMP_SLE: if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) { RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, - /*HasNUW=*/false, /*HasNSW=*/true); + SCEV::FlagNSW); Pred = ICmpInst::ICMP_SLT; Changed = true; } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) { LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, - /*HasNUW=*/false, /*HasNSW=*/true); + SCEV::FlagNSW); Pred = ICmpInst::ICMP_SLT; Changed = true; } @@ -5233,12 +5258,12 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, case ICmpInst::ICMP_SGE: if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) { RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, - /*HasNUW=*/false, /*HasNSW=*/true); + SCEV::FlagNSW); Pred = ICmpInst::ICMP_SGT; Changed = true; } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) { LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, - /*HasNUW=*/false, /*HasNSW=*/true); + SCEV::FlagNSW); Pred = ICmpInst::ICMP_SGT; Changed = true; } @@ -5246,12 +5271,12 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, case ICmpInst::ICMP_ULE: if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) { RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, - /*HasNUW=*/true, /*HasNSW=*/false); + SCEV::FlagNUW); Pred = ICmpInst::ICMP_ULT; Changed = true; } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) { LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, - /*HasNUW=*/true, /*HasNSW=*/false); + SCEV::FlagNUW); Pred = ICmpInst::ICMP_ULT; Changed = true; } @@ -5259,12 +5284,12 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, case ICmpInst::ICMP_UGE: if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) { RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, - /*HasNUW=*/true, /*HasNSW=*/false); + SCEV::FlagNUW); Pred = ICmpInst::ICMP_UGT; Changed = true; } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) { LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, - /*HasNUW=*/true, /*HasNSW=*/false); + SCEV::FlagNUW); Pred = ICmpInst::ICMP_UGT; Changed = true; } @@ -5646,6 +5671,13 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start, "This code doesn't handle negative strides yet!"); const Type *Ty = Start->getType(); + + // When Start == End, we have an exact BECount == 0. Short-circuit this case + // here because SCEV may not be able to determine that the unsigned division + // after rounding is zero. + if (Start == End) + return getConstant(Ty, 0); + const SCEV *NegOne = getConstant(Ty, (uint64_t)-1); const SCEV *Diff = getMinusSCEV(End, Start); const SCEV *RoundUp = getAddExpr(Step, NegOne); @@ -5683,8 +5715,8 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, return getCouldNotCompute(); // Check to see if we have a flag which makes analysis easy. - bool NoWrap = isSigned ? AddRec->hasNoSignedWrap() : - AddRec->hasNoUnsignedWrap(); + bool NoWrap = isSigned ? AddRec->getNoWrapFlags(SCEV::FlagNSW) : + AddRec->getNoWrapFlags(SCEV::FlagNUW); if (AddRec->isAffine()) { unsigned BitWidth = getTypeSizeInBits(AddRec->getType()); @@ -5768,7 +5800,16 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, // The maximum backedge count is similar, except using the minimum start // value and the maximum end value. - const SCEV *MaxBECount = getBECount(MinStart, MaxEnd, Step, NoWrap); + // If we already have an exact constant BECount, use it instead. + const SCEV *MaxBECount = isa<SCEVConstant>(BECount) ? BECount + : getBECount(MinStart, MaxEnd, Step, NoWrap); + + // If the stride is nonconstant, and NoWrap == true, then + // getBECount(MinStart, MaxEnd) may not compute. This would result in an + // exact BECount and invalid MaxBECount, which should be avoided to catch + // more optimization opportunities. + if (isa<SCEVCouldNotCompute>(MaxBECount)) + MaxBECount = BECount; return BackedgeTakenInfo(BECount, MaxBECount); } @@ -5791,7 +5832,8 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, if (!SC->getValue()->isZero()) { SmallVector<const SCEV *, 4> Operands(op_begin(), op_end()); Operands[0] = SE.getConstant(SC->getType(), 0); - const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop()); + const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(), + getNoWrapFlags(FlagNW)); if (const SCEVAddRecExpr *ShiftedAddRec = dyn_cast<SCEVAddRecExpr>(Shifted)) return ShiftedAddRec->getNumIterationsInRange( @@ -5852,7 +5894,9 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // Range.getUpper() is crossed. SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end()); NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper())); - const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop()); + const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(), + // getNoWrapFlags(FlagNW) + FlagAnyWrap); // Next, solve the constructed addrec std::pair<const SCEV *,const SCEV *> Roots = diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index b7c110f..8e5a400 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -262,7 +262,8 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *Start = A->getStart(); if (!FactorOutConstant(Start, Remainder, Factor, SE, TD)) return false; - S = SE.getAddRecExpr(Start, Step, A->getLoop()); + // FIXME: can use A->getNoWrapFlags(FlagNW) + S = SE.getAddRecExpr(Start, Step, A->getLoop(), SCEV::FlagAnyWrap); return true; } @@ -314,7 +315,9 @@ static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops, const SCEV *Zero = SE.getConstant(Ty, 0); AddRecs.push_back(SE.getAddRecExpr(Zero, A->getStepRecurrence(SE), - A->getLoop())); + A->getLoop(), + // FIXME: A->getNoWrapFlags(FlagNW) + SCEV::FlagAnyWrap)); if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) { Ops[i] = Zero; Ops.append(Add->op_begin(), Add->op_end()); @@ -823,7 +826,9 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, Rest = SE.getAddExpr(Rest, SE.getAddRecExpr(SE.getConstant(A->getType(), 0), A->getStepRecurrence(SE), - A->getLoop())); + A->getLoop(), + // FIXME: A->getNoWrapFlags(FlagNW) + SCEV::FlagAnyWrap)); } if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) { Base = A->getOperand(A->getNumOperands()-1); @@ -858,7 +863,8 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // loop already visited by LSR for example, but it wouldn't have // to be. do { - if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV)) { + if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) || + (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV))) { IncV = 0; break; } @@ -926,14 +932,14 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); // Create the PHI. - Builder.SetInsertPoint(L->getHeader(), L->getHeader()->begin()); - PHINode *PN = Builder.CreatePHI(ExpandTy, "lsr.iv"); + BasicBlock *Header = L->getHeader(); + Builder.SetInsertPoint(Header, Header->begin()); + pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); + PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE), "lsr.iv"); rememberInstruction(PN); // Create the step instructions and populate the PHI. - BasicBlock *Header = L->getHeader(); - for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header); - HPI != HPE; ++HPI) { + for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { BasicBlock *Pred = *HPI; // Add a start value. @@ -1004,10 +1010,11 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { if (!SE.properlyDominates(Start, L->getHeader())) { PostLoopOffset = Start; Start = SE.getConstant(Normalized->getType(), 0); - Normalized = - cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start, - Normalized->getStepRecurrence(SE), - Normalized->getLoop())); + Normalized = cast<SCEVAddRecExpr>( + SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE), + Normalized->getLoop(), + // FIXME: Normalized->getNoWrapFlags(FlagNW) + SCEV::FlagAnyWrap)); } // Strip off any non-loop-dominating component from the addrec step. @@ -1018,7 +1025,10 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { Step = SE.getConstant(Normalized->getType(), 1); Normalized = cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start, Step, - Normalized->getLoop())); + Normalized->getLoop(), + // FIXME: Normalized + // ->getNoWrapFlags(FlagNW) + SCEV::FlagAnyWrap)); } // Expand the core addrec. If we need post-loop scaling, force it to @@ -1081,7 +1091,9 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { SmallVector<const SCEV *, 4> NewOps(S->getNumOperands()); for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType()); - Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop())); + Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), + // FIXME: S->getNoWrapFlags(FlagNW) + SCEV::FlagAnyWrap)); BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); BasicBlock::iterator NewInsertPt = @@ -1098,7 +1110,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { if (!S->getStart()->isZero()) { SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end()); NewOps[0] = SE.getConstant(Ty, 0); - const SCEV *Rest = SE.getAddRecExpr(NewOps, L); + // FIXME: can use S->getNoWrapFlags() + const SCEV *Rest = SE.getAddRecExpr(NewOps, L, SCEV::FlagAnyWrap); // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the // comments on expandAddToGEP for details. @@ -1128,12 +1141,13 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // Create and insert the PHI node for the induction variable in the // specified loop. BasicBlock *Header = L->getHeader(); - CanonicalIV = PHINode::Create(Ty, "indvar", Header->begin()); + pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); + CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar", + Header->begin()); rememberInstruction(CanonicalIV); Constant *One = ConstantInt::get(Ty, 1); - for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header); - HPI != HPE; ++HPI) { + for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { BasicBlock *HP = *HPI; if (L->contains(HP)) { // Insert a unit add instruction right before the terminator @@ -1333,7 +1347,7 @@ void SCEVExpander::rememberInstruction(Value *I) { InsertedValues.insert(I); // If we just claimed an existing instruction and that instruction had - // been the insert point, adjust the insert point forward so that + // been the insert point, adjust the insert point forward so that // subsequently inserted code will be dominated. if (Builder.GetInsertPoint() == I) { BasicBlock::iterator It = cast<Instruction>(I); @@ -1361,8 +1375,9 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, assert(Ty->isIntegerTy() && "Can only insert integer induction variables!"); // Build a SCEV for {0,+,1}<L>. + // Conservatively use FlagAnyWrap for now. const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0), - SE.getConstant(Ty, 1), L); + SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap); // Emit code for it. BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp index ac36cef..60e630a 100644 --- a/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -97,7 +97,8 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, const SCEV *N = TransformForPostIncUse(Kind, O, LUser, 0, Loops, SE, DT); Operands.push_back(N); } - const SCEV *Result = SE.getAddRecExpr(Operands, L); + // Conservatively use AnyWrap until/unless we need FlagNW. + const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); switch (Kind) { default: llvm_unreachable("Unexpected transform name!"); case NormalizeAutodetect: diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp index 40e18ab..0faf1398 100644 --- a/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -31,7 +31,7 @@ // // The second field identifies the type's parent node in the tree, or // is null or omitted for a root node. A type is considered to alias -// all of its decendents and all of its ancestors in the tree. Also, +// all of its descendants and all of its ancestors in the tree. Also, // a type is considered to alias all types in other trees, so that // bitcode produced from multiple front-ends is handled conservatively. // @@ -59,6 +59,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Passes.h" +#include "llvm/Constants.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Metadata.h" diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 1060bc5..8f18dd2 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -429,6 +429,29 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, KnownZero |= LHSKnownZero & Mask; KnownOne |= LHSKnownOne & Mask; } + + // Are we still trying to solve for the sign bit? + if (Mask.isNegative() && !KnownZero.isNegative() && !KnownOne.isNegative()){ + OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(I); + if (OBO->hasNoSignedWrap()) { + if (I->getOpcode() == Instruction::Add) { + // Adding two positive numbers can't wrap into negative + if (LHSKnownZero.isNegative() && KnownZero2.isNegative()) + KnownZero |= APInt::getSignBit(BitWidth); + // and adding two negative numbers can't wrap into positive. + else if (LHSKnownOne.isNegative() && KnownOne2.isNegative()) + KnownOne |= APInt::getSignBit(BitWidth); + } else { + // Subtracting a negative number from a positive one can't wrap + if (LHSKnownZero.isNegative() && KnownOne2.isNegative()) + KnownZero |= APInt::getSignBit(BitWidth); + // neither can subtracting a positive number from a negative one. + else if (LHSKnownOne.isNegative() && KnownZero2.isNegative()) + KnownOne |= APInt::getSignBit(BitWidth); + } + } + } + return; } case Instruction::SRem: @@ -460,6 +483,19 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } } + + // The sign bit is the LHS's sign bit, except when the result of the + // remainder is zero. + if (Mask.isNegative() && KnownZero.isNonNegative()) { + APInt Mask2 = APInt::getSignBit(BitWidth); + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD, + Depth+1); + // If it's known zero, our sign bit is also zero. + if (LHSKnownZero.isNegative()) + KnownZero |= LHSKnownZero; + } + break; case Instruction::URem: { if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { @@ -597,6 +633,10 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // Otherwise take the unions of the known bit sets of the operands, // taking conservative care to avoid excessive recursion. if (Depth < MaxDepth - 1 && !KnownZero && !KnownOne) { + // Skip if every incoming value references to ourself. + if (P->hasConstantValue() == P) + break; + KnownZero = APInt::getAllOnesValue(BitWidth); KnownOne = APInt::getAllOnesValue(BitWidth); for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) { @@ -684,6 +724,16 @@ bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, unsigned Depth) { return isPowerOfTwo(SI->getTrueValue(), TD, Depth) && isPowerOfTwo(SI->getFalseValue(), TD, Depth); + // An exact divide or right shift can only shift off zero bits, so the result + // is a power of two only if the first operand is a power of two and not + // copying a sign bit (sdiv int_min, 2). + if (match(V, m_LShr(m_Value(), m_Value())) || + match(V, m_UDiv(m_Value(), m_Value()))) { + PossiblyExactOperator *PEO = cast<PossiblyExactOperator>(V); + if (PEO->isExact()) + return isPowerOfTwo(PEO->getOperand(0), TD, Depth); + } + return false; } @@ -720,6 +770,11 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined // if the lowest bit is shifted off the end. if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) { + // shl nuw can't remove any non-zero bits. + BinaryOperator *BO = cast<BinaryOperator>(V); + if (BO->hasNoUnsignedWrap()) + return isKnownNonZero(X, TD, Depth); + APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); ComputeMaskedBits(X, APInt(BitWidth, 1), KnownZero, KnownOne, TD, Depth); @@ -729,11 +784,22 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { // shr X, Y != 0 if X is negative. Note that the value of the shift is not // defined if the sign bit is shifted off the end. else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) { + // shr exact can only shift out zero bits. + BinaryOperator *BO = cast<BinaryOperator>(V); + if (BO->isExact()) + return isKnownNonZero(X, TD, Depth); + bool XKnownNonNegative, XKnownNegative; ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth); if (XKnownNegative) return true; } + // div exact can only produce a zero if the dividend is zero. + else if (match(V, m_IDiv(m_Value(X), m_Value()))) { + BinaryOperator *BO = cast<BinaryOperator>(V); + if (BO->isExact()) + return isKnownNonZero(X, TD, Depth); + } // X + Y. else if (match(V, m_Add(m_Value(X), m_Value(Y)))) { bool XKnownNonNegative, XKnownNegative; @@ -1262,7 +1328,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, break; } } - // If we succesfully found a value for each of our subaggregates + // If we successfully found a value for each of our subaggregates if (To) return To; } @@ -1691,7 +1757,7 @@ llvm::GetUnderlyingObject(Value *V, const TargetData *TD, unsigned MaxLookup) { } else { // See if InstructionSimplify knows any relevant tricks. if (Instruction *I = dyn_cast<Instruction>(V)) - // TODO: Aquire a DominatorTree and use it. + // TODO: Acquire a DominatorTree and use it. if (Value *Simplified = SimplifyInstruction(I, TD, 0)) { V = Simplified; continue; diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp index c5ad5fc..8fcc7aa 100644 --- a/lib/Archive/ArchiveWriter.cpp +++ b/lib/Archive/ArchiveWriter.cpp @@ -18,7 +18,6 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Process.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Signals.h" #include "llvm/Support/system_error.h" #include <fstream> @@ -28,7 +27,7 @@ using namespace llvm; // Write an integer using variable bit rate encoding. This saves a few bytes // per entry in the symbol table. -static inline void writeInteger(unsigned num, raw_ostream& ARFile) { +static inline void writeInteger(unsigned num, std::ofstream& ARFile) { while (1) { if (num < 0x80) { // done? ARFile << (unsigned char)num; @@ -202,14 +201,14 @@ Archive::addFileBefore(const sys::Path& filePath, iterator where, bool Archive::writeMember( const ArchiveMember& member, - raw_ostream& ARFile, + std::ofstream& ARFile, bool CreateSymbolTable, bool TruncateNames, bool ShouldCompress, std::string* ErrMsg ) { - unsigned filepos = ARFile.tell(); + unsigned filepos = ARFile.tellp(); filepos -= 8; // Get the data and its size either from the @@ -282,7 +281,7 @@ Archive::writeMember( ARFile.write(data,fSize); // Make sure the member is an even length - if ((ARFile.tell() & 1) == 1) + if ((ARFile.tellp() & 1) == 1) ARFile << ARFILE_PAD; // Close the mapped file if it was opened @@ -292,7 +291,7 @@ Archive::writeMember( // Write out the LLVM symbol table as an archive member to the file. void -Archive::writeSymbolTable(raw_ostream& ARFile) { +Archive::writeSymbolTable(std::ofstream& ARFile) { // Construct the symbol table's header ArchiveMemberHeader Hdr; @@ -316,7 +315,7 @@ Archive::writeSymbolTable(raw_ostream& ARFile) { #ifndef NDEBUG // Save the starting position of the symbol tables data content. - unsigned startpos = ARFile.tell(); + unsigned startpos = ARFile.tellp(); #endif // Write out the symbols sequentially @@ -333,7 +332,7 @@ Archive::writeSymbolTable(raw_ostream& ARFile) { #ifndef NDEBUG // Now that we're done with the symbol table, get the ending file position - unsigned endpos = ARFile.tell(); + unsigned endpos = ARFile.tellp(); #endif // Make sure that the amount we wrote is what we pre-computed. This is @@ -362,20 +361,25 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, } // Create a temporary file to store the archive in - SmallString<128> TempArchivePath; - int ArchFD; - if (error_code ec = - sys::fs::unique_file("%%-%%-%%-%%-" + sys::path::filename(archPath.str()), - ArchFD, TempArchivePath)) { - if (ErrMsg) *ErrMsg = ec.message(); + sys::Path TmpArchive = archPath; + if (TmpArchive.createTemporaryFileOnDisk(ErrMsg)) return true; - } // Make sure the temporary gets removed if we crash - sys::RemoveFileOnSignal(sys::Path(TempArchivePath.str())); + sys::RemoveFileOnSignal(TmpArchive); // Create archive file for output. - raw_fd_ostream ArchiveFile(ArchFD, true); + std::ios::openmode io_mode = std::ios::out | std::ios::trunc | + std::ios::binary; + std::ofstream ArchiveFile(TmpArchive.c_str(), io_mode); + + // Check for errors opening or creating archive file. + if (!ArchiveFile.is_open() || ArchiveFile.bad()) { + TmpArchive.eraseFromDisk(); + if (ErrMsg) + *ErrMsg = "Error opening archive file: " + archPath.str(); + return true; + } // If we're creating a symbol table, reset it now if (CreateSymbolTable) { @@ -391,9 +395,8 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, for (MembersList::iterator I = begin(), E = end(); I != E; ++I) { if (writeMember(*I, ArchiveFile, CreateSymbolTable, TruncateNames, Compress, ErrMsg)) { + TmpArchive.eraseFromDisk(); ArchiveFile.close(); - bool existed; - sys::fs::remove(TempArchivePath.str(), existed); return true; } } @@ -408,12 +411,12 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, // ensure compatibility with other archivers we need to put the symbol // table first in the file. Unfortunately, this means mapping the file // we just wrote back in and copying it to the destination file. - SmallString<128> TempArchiveWithSymbolTablePath; + sys::Path FinalFilePath = archPath; // Map in the archive we just wrote. { OwningPtr<MemoryBuffer> arch; - if (error_code ec = MemoryBuffer::getFile(TempArchivePath.c_str(), arch)) { + if (error_code ec = MemoryBuffer::getFile(TmpArchive.c_str(), arch)) { if (ErrMsg) *ErrMsg = ec.message(); return true; @@ -422,15 +425,17 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, // Open another temporary file in order to avoid invalidating the // mmapped data - if (error_code ec = - sys::fs::unique_file("%%-%%-%%-%%-" + sys::path::filename(archPath.str()), - ArchFD, TempArchiveWithSymbolTablePath)) { - if (ErrMsg) *ErrMsg = ec.message(); + if (FinalFilePath.createTemporaryFileOnDisk(ErrMsg)) return true; - } - sys::RemoveFileOnSignal(sys::Path(TempArchiveWithSymbolTablePath.str())); + sys::RemoveFileOnSignal(FinalFilePath); - raw_fd_ostream FinalFile(ArchFD, true); + std::ofstream FinalFile(FinalFilePath.c_str(), io_mode); + if (!FinalFile.is_open() || FinalFile.bad()) { + TmpArchive.eraseFromDisk(); + if (ErrMsg) + *ErrMsg = "Error opening archive file: " + FinalFilePath.str(); + return true; + } // Write the file magic number FinalFile << ARFILE_MAGIC; @@ -443,8 +448,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, if (foreignST) { if (writeMember(*foreignST, FinalFile, false, false, false, ErrMsg)) { FinalFile.close(); - bool existed; - sys::fs::remove(TempArchiveWithSymbolTablePath.str(), existed); + TmpArchive.eraseFromDisk(); return true; } } @@ -462,11 +466,8 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, } // free arch. // Move the final file over top of TmpArchive - if (error_code ec = sys::fs::rename(TempArchiveWithSymbolTablePath.str(), - TempArchivePath.str())) { - if (ErrMsg) *ErrMsg = ec.message(); + if (FinalFilePath.renamePathOnDisk(TmpArchive, ErrMsg)) return true; - } } // Before we replace the actual archive, we need to forget all the @@ -474,11 +475,8 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, // this because we cannot replace an open file on Windows. cleanUpMemory(); - if (error_code ec = sys::fs::rename(TempArchivePath.str(), - archPath.str())) { - if (ErrMsg) *ErrMsg = ec.message(); + if (TmpArchive.renamePathOnDisk(archPath, ErrMsg)) return true; - } // Set correct read and write permissions after temporary file is moved // to final destination path. diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index cdfacbe..a2c53be 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -249,11 +249,7 @@ bool LLParser::ParseModuleAsm() { if (ParseToken(lltok::kw_asm, "expected 'module asm'") || ParseStringConstant(AsmStr)) return true; - const std::string &AsmSoFar = M->getModuleInlineAsm(); - if (AsmSoFar.empty()) - M->setModuleInlineAsm(AsmStr); - else - M->setModuleInlineAsm(AsmSoFar+"\n"+AsmStr); + M->appendModuleInlineAsm(AsmStr); return false; } @@ -518,7 +514,7 @@ bool LLParser::ParseMDNodeID(MDNode *&Result) { if (Result) return false; // Otherwise, create MDNode forward reference. - MDNode *FwdNode = MDNode::getTemporary(Context, 0, 0); + MDNode *FwdNode = MDNode::getTemporary(Context, ArrayRef<Value*>()); ForwardRefMDNodes[MID] = std::make_pair(FwdNode, Lex.getLoc()); if (NumberedMetadata.size() <= MID) @@ -576,7 +572,7 @@ bool LLParser::ParseStandaloneMetadata() { ParseToken(lltok::rbrace, "expected end of metadata node")) return true; - MDNode *Init = MDNode::get(Context, Elts.data(), Elts.size()); + MDNode *Init = MDNode::get(Context, Elts); // See if this was forward referenced, if so, handle it. std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> >::iterator @@ -2502,7 +2498,7 @@ bool LLParser::ParseMetadataListValue(ValID &ID, PerFunctionState *PFS) { ParseToken(lltok::rbrace, "expected end of metadata node")) return true; - ID.MDNodeVal = MDNode::get(Context, Elts.data(), Elts.size()); + ID.MDNodeVal = MDNode::get(Context, Elts); ID.Kind = ValID::t_MDNode; return false; } @@ -3638,8 +3634,7 @@ int LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) { if (!Ty->isFirstClassType()) return Error(TypeLoc, "phi node must have first class type"); - PHINode *PN = PHINode::Create(Ty); - PN->reserveOperandSpace(PHIVals.size()); + PHINode *PN = PHINode::Create(Ty, PHIVals.size()); for (unsigned i = 0, e = PHIVals.size(); i != e; ++i) PN->addIncoming(PHIVals[i].first, PHIVals[i].second); Inst = PN; diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index dbf8da0..19f57cf 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -301,8 +301,7 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() { NewC = ConstantVector::get(NewOps); } else { assert(isa<ConstantExpr>(UserC) && "Must be a ConstantExpr."); - NewC = cast<ConstantExpr>(UserC)->getWithOperands(&NewOps[0], - NewOps.size()); + NewC = cast<ConstantExpr>(UserC)->getWithOperands(NewOps); } UserC->replaceAllUsesWith(NewC); @@ -350,7 +349,7 @@ Value *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) { } // Create and return a placeholder, which will later be RAUW'd. - Value *V = MDNode::getTemporary(Context, 0, 0); + Value *V = MDNode::getTemporary(Context, ArrayRef<Value*>()); MDValuePtrs[Idx] = V; return V; } @@ -844,9 +843,7 @@ bool BitcodeReader::ParseMetadata() { else Elts.push_back(NULL); } - Value *V = MDNode::getWhenValsUnresolved(Context, - Elts.data(), Elts.size(), - IsFunctionLocal); + Value *V = MDNode::getWhenValsUnresolved(Context, Elts, IsFunctionLocal); IsFunctionLocal = false; MDValueList.AssignValue(V, NextMDValueNo++); break; @@ -2288,9 +2285,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { const Type *Ty = getTypeByID(Record[0]); if (!Ty) return Error("Invalid PHI record"); - PHINode *PN = PHINode::Create(Ty); + PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2); InstructionList.push_back(PN); - PN->reserveOperandSpace((Record.size()-1)/2); for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) { Value *V = getFnValueByID(Record[1+i], Ty); diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index f8ef8c6..e34137f 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -197,7 +197,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { // Loop over all of the types, emitting each in turn. for (unsigned i = 0, e = TypeList.size(); i != e; ++i) { - const Type *T = TypeList[i].first; + const Type *T = TypeList[i]; int AbbrevToUse = 0; unsigned Code = 0; diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index 2f02262..21f004a 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "ValueEnumerator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" @@ -21,22 +23,10 @@ #include <algorithm> using namespace llvm; -static bool isSingleValueType(const std::pair<const llvm::Type*, - unsigned int> &P) { - return P.first->isSingleValueType(); -} - static bool isIntegerValue(const std::pair<const Value*, unsigned> &V) { return V.first->getType()->isIntegerTy(); } -static bool CompareByFrequency(const std::pair<const llvm::Type*, - unsigned int> &P1, - const std::pair<const llvm::Type*, - unsigned int> &P2) { - return P1.second > P2.second; -} - /// ValueEnumerator - Enumerate module-level information. ValueEnumerator::ValueEnumerator(const Module *M) { // Enumerate the global variables. @@ -120,18 +110,72 @@ ValueEnumerator::ValueEnumerator(const Module *M) { // Optimize constant ordering. OptimizeConstants(FirstConstant, Values.size()); - // Sort the type table by frequency so that most commonly used types are early - // in the table (have low bit-width). - std::stable_sort(Types.begin(), Types.end(), CompareByFrequency); - - // Partition the Type ID's so that the single-value types occur before the - // aggregate types. This allows the aggregate types to be dropped from the - // type table after parsing the global variable initializers. - std::partition(Types.begin(), Types.end(), isSingleValueType); + OptimizeTypes(); // Now that we rearranged the type table, rebuild TypeMap. for (unsigned i = 0, e = Types.size(); i != e; ++i) - TypeMap[Types[i].first] = i+1; + TypeMap[Types[i]] = i+1; +} + +struct TypeAndDeps { + const Type *Ty; + unsigned NumDeps; +}; + +static int CompareByDeps(const void *a, const void *b) { + const TypeAndDeps &ta = *(const TypeAndDeps*) a; + const TypeAndDeps &tb = *(const TypeAndDeps*) b; + return ta.NumDeps - tb.NumDeps; +} + +static void VisitType(const Type *Ty, SmallPtrSet<const Type*, 16> &Visited, + std::vector<const Type*> &Out) { + if (Visited.count(Ty)) + return; + + Visited.insert(Ty); + + for (Type::subtype_iterator I2 = Ty->subtype_begin(), + E2 = Ty->subtype_end(); I2 != E2; ++I2) { + const Type *InnerType = I2->get(); + VisitType(InnerType, Visited, Out); + } + + Out.push_back(Ty); +} + +void ValueEnumerator::OptimizeTypes(void) { + // If the types form a DAG, this will compute a topological sort and + // no forward references will be needed when reading them in. + // If there are cycles, this is a simple but reasonable heuristic for + // the minimum feedback arc set problem. + const unsigned NumTypes = Types.size(); + std::vector<TypeAndDeps> TypeDeps; + TypeDeps.resize(NumTypes); + + for (unsigned I = 0; I < NumTypes; ++I) { + const Type *Ty = Types[I]; + TypeDeps[I].Ty = Ty; + TypeDeps[I].NumDeps = 0; + } + + for (unsigned I = 0; I < NumTypes; ++I) { + const Type *Ty = TypeDeps[I].Ty; + for (Type::subtype_iterator I2 = Ty->subtype_begin(), + E2 = Ty->subtype_end(); I2 != E2; ++I2) { + const Type *InnerType = I2->get(); + unsigned InnerIndex = TypeMap.lookup(InnerType) - 1; + TypeDeps[InnerIndex].NumDeps++; + } + } + array_pod_sort(TypeDeps.begin(), TypeDeps.end(), CompareByDeps); + + SmallPtrSet<const Type*, 16> Visited; + Types.clear(); + Types.reserve(NumTypes); + for (unsigned I = 0; I < NumTypes; ++I) { + VisitType(TypeDeps[I].Ty, Visited, Types); + } } unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const { @@ -319,7 +363,7 @@ void ValueEnumerator::EnumerateValue(const Value *V) { // Initializers for globals are handled explicitly elsewhere. } else if (isa<ConstantArray>(C) && cast<ConstantArray>(C)->isString()) { // Do not enumerate the initializers for an array of simple characters. - // The initializers just polute the value table, and we emit the strings + // The initializers just pollute the value table, and we emit the strings // specially. } else if (C->getNumOperands()) { // If a constant has operands, enumerate them. This makes sure that if a @@ -352,14 +396,12 @@ void ValueEnumerator::EnumerateValue(const Value *V) { void ValueEnumerator::EnumerateType(const Type *Ty) { unsigned &TypeID = TypeMap[Ty]; - if (TypeID) { - // If we've already seen this type, just increase its occurrence count. - Types[TypeID-1].second++; + // We've already seen this type. + if (TypeID) return; - } // First time we saw this type, add it. - Types.push_back(std::make_pair(Ty, 1U)); + Types.push_back(Ty); TypeID = Types.size(); // Enumerate subtypes. @@ -381,7 +423,7 @@ void ValueEnumerator::EnumerateOperandType(const Value *V) { // This constant may have operands, make sure to enumerate the types in // them. for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { - const User *Op = C->getOperand(i); + const Value *Op = C->getOperand(i); // Don't enumerate basic blocks here, this happens as operands to // blockaddress. diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index cd1d237..1e42a26 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -36,8 +36,7 @@ class MDSymbolTable; class ValueEnumerator { public: - // For each type, we remember its Type* and occurrence frequency. - typedef std::vector<std::pair<const Type*, unsigned> > TypeList; + typedef std::vector<const Type*> TypeList; // For each value, we remember its Value* and occurrence frequency. typedef std::vector<std::pair<const Value*, unsigned> > ValueList; @@ -136,6 +135,7 @@ public: private: void OptimizeConstants(unsigned CstStart, unsigned CstEnd); + void OptimizeTypes(); void EnumerateMDNodeOperands(const MDNode *N); void EnumerateMetadata(const Value *MD); diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index e2838c3..80118f0 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -7,6 +7,7 @@ add_subdirectory(Transforms) add_subdirectory(Linker) add_subdirectory(Analysis) add_subdirectory(MC) +add_subdirectory(CompilerDriver) add_subdirectory(Object) add_subdirectory(ExecutionEngine) add_subdirectory(Target) diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index b520d8f..5c809f7 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -357,7 +357,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, RegRefs = State->GetRegRefs(); // Handle dead defs by simulating a last-use of the register just - // after the def. A dead def can occur because the def is truely + // after the def. A dead def can occur because the def is truly // dead, or because only a subregister is live at the def. If we // don't do this the dead def will be incorrectly merged into the // previous def. diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 36638c3..125e641 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -211,7 +211,6 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, const BasicBlock *ExitBB = I->getParent(); const TerminatorInst *Term = ExitBB->getTerminator(); const ReturnInst *Ret = dyn_cast<ReturnInst>(Term); - const Function *F = ExitBB->getParent(); // The block must end in a return statement or unreachable. // @@ -250,6 +249,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, // Conservatively require the attributes of the call to match those of // the return. Ignore noalias because it doesn't affect the call sequence. + const Function *F = ExitBB->getParent(); unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) return false; diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp new file mode 100644 index 0000000..0db28a6 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -0,0 +1,87 @@ +//===-- CodeGen/AsmPrinter/ARMException.cpp - ARM EHABI Exception Impl ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing DWARF exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfException.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +using namespace llvm; + +ARMException::ARMException(AsmPrinter *A) + : DwarfException(A), + shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false) + {} + +ARMException::~ARMException() {} + +void ARMException::EndModule() { +} + +/// BeginFunction - Gather pre-function exception information. Assumes it's +/// being emitted immediately after the function entry point. +void ARMException::BeginFunction(const MachineFunction *MF) { + Asm->OutStreamer.EmitFnStart(); + if (!Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory) + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", + Asm->getFunctionNumber())); +} + +/// EndFunction - Gather and emit post-function exception information. +/// +void ARMException::EndFunction() { + if (Asm->MF->getFunction()->doesNotThrow() && !UnwindTablesMandatory) + Asm->OutStreamer.EmitCantUnwind(); + else { + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", + Asm->getFunctionNumber())); + + // Emit references to personality. + if (const Function * Personality = + MMI->getPersonalities()[MMI->getPersonalityIndex()]) { + MCSymbol *PerSym = Asm->Mang->getSymbol(Personality); + Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global); + Asm->OutStreamer.EmitPersonality(PerSym); + } + + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); + + Asm->OutStreamer.EmitHandlerData(); + + // Emit actual exception table + EmitExceptionTable(); + } + + Asm->OutStreamer.EmitFnEnd(); +} diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 9cb882e..8116f8d 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -33,10 +33,12 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Assembly/Writer.h" #include "llvm/ADT/SmallString.h" @@ -70,17 +72,17 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const TargetData &TD, unsigned NumBits = 0; if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) NumBits = TD.getPreferredAlignmentLog(GVar); - + // If InBits is specified, round it to it. if (InBits > NumBits) NumBits = InBits; - + // If the GV has a specified alignment, take it into account. if (GV->getAlignment() == 0) return NumBits; - + unsigned GVAlign = Log2_32(GV->getAlignment()); - + // If the GVAlign is larger than NumBits, or if we are required to obey // NumBits because the GV has an assigned section, obey it. if (GVAlign > NumBits || GV->hasSection()) @@ -104,16 +106,16 @@ AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) AsmPrinter::~AsmPrinter() { assert(DD == 0 && DE == 0 && "Debug/EH info didn't get finalized"); - + if (GCMetadataPrinters != 0) { gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); - + for (gcp_map_type::iterator I = GCMap.begin(), E = GCMap.end(); I != E; ++I) delete I->second; delete &GCMap; GCMetadataPrinters = 0; } - + delete &OutStreamer; } @@ -156,9 +158,9 @@ bool AsmPrinter::doInitialization(Module &M) { // Initialize TargetLoweringObjectFile. const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) .Initialize(OutContext, TM); - + Mang = new Mangler(OutContext, *TM.getTargetData()); - + // Allow the target to emit any magic that it wants at the start of the file. EmitStartOfAsmFile(M); @@ -196,6 +198,9 @@ bool AsmPrinter::doInitialization(Module &M) { case ExceptionHandling::DwarfCFI: DE = new DwarfCFIException(this); break; + case ExceptionHandling::ARM: + DE = new ARMException(this); + break; } return false; @@ -250,56 +255,52 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { /// EmitGlobalVariable - Emit the specified global variable to the .s file. void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { - if (!GV->hasInitializer()) // External globals require no code. - return; - - // Check to see if this is a special global used by LLVM, if so, emit it. - if (EmitSpecialLLVMGlobal(GV)) - return; + if (GV->hasInitializer()) { + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(GV)) + return; - if (isVerbose()) { - WriteAsOperand(OutStreamer.GetCommentOS(), GV, - /*PrintType=*/false, GV->getParent()); - OutStreamer.GetCommentOS() << '\n'; + if (isVerbose()) { + WriteAsOperand(OutStreamer.GetCommentOS(), GV, + /*PrintType=*/false, GV->getParent()); + OutStreamer.GetCommentOS() << '\n'; + } } - + MCSymbol *GVSym = Mang->getSymbol(GV); EmitVisibility(GVSym, GV->getVisibility()); + if (!GV->hasInitializer()) // External globals require no extra code. + return; + if (MAI->hasDotTypeDotSizeDirective()) OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject); - + SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); const TargetData *TD = TM.getTargetData(); uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); - + // If the alignment is specified, we *must* obey it. Overaligning a global // with a specified alignment is a prompt way to break globals emitted to // sections and expected to be contiguous (e.g. ObjC metadata). unsigned AlignLog = getGVAlignmentLog2(GV, *TD); - + // Handle common and BSS local symbols (.lcomm). if (GVKind.isCommon() || GVKind.isBSSLocal()) { if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. - - if (isVerbose()) { - WriteAsOperand(OutStreamer.GetCommentOS(), GV, - /*PrintType=*/false, GV->getParent()); - OutStreamer.GetCommentOS() << '\n'; - } - + // Handle common symbols. if (GVKind.isCommon()) { unsigned Align = 1 << AlignLog; if (!getObjFileLowering().getCommDirectiveSupportsAlignment()) Align = 0; - + // .comm _foo, 42, 4 OutStreamer.EmitCommonSymbol(GVSym, Size, Align); return; } - + // Handle local BSS symbols. if (MAI->hasMachoZeroFillDirective()) { const MCSection *TheSection = @@ -308,7 +309,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog); return; } - + if (MAI->hasLCOMMDirective()) { // .lcomm _foo, 42 OutStreamer.EmitLocalCommonSymbol(GVSym, Size); @@ -318,14 +319,14 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { unsigned Align = 1 << AlignLog; if (!getObjFileLowering().getCommDirectiveSupportsAlignment()) Align = 0; - + // .local _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Local); // .comm _foo, 42, 4 OutStreamer.EmitCommonSymbol(GVSym, Size, Align); return; } - + const MCSection *TheSection = getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM); @@ -333,14 +334,14 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // emission. if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) { if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined. - + // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); // .zerofill __DATA, __common, _foo, 400, 5 OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog); return; } - + // Handle thread local data for mach-o which requires us to output an // additional structure of data and mangle the original symbol so that we // can reference it later. @@ -353,31 +354,31 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // specific code. if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) { // Emit the .tbss symbol - MCSymbol *MangSym = + MCSymbol *MangSym = OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init")); - + if (GVKind.isThreadBSS()) OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog); else if (GVKind.isThreadData()) { OutStreamer.SwitchSection(TheSection); - EmitAlignment(AlignLog, GV); + EmitAlignment(AlignLog, GV); OutStreamer.EmitLabel(MangSym); - + EmitGlobalConstant(GV->getInitializer()); } - + OutStreamer.AddBlankLine(); - + // Emit the variable struct for the runtime. - const MCSection *TLVSect + const MCSection *TLVSect = getObjFileLowering().getTLSExtraDataSection(); - + OutStreamer.SwitchSection(TLVSect); // Emit the linkage here. EmitLinkage(GV->getLinkage(), GVSym); OutStreamer.EmitLabel(GVSym); - + // Three pointers in size: // - __tlv_bootstrap - used to make sure support exists // - spare pointer, used when mapped by the runtime @@ -387,7 +388,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { PtrSize, 0); OutStreamer.EmitIntValue(0, PtrSize, 0); OutStreamer.EmitSymbolValue(MangSym, PtrSize, 0); - + OutStreamer.AddBlankLine(); return; } @@ -404,7 +405,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (MAI->hasDotTypeDotSizeDirective()) // .size foo, 42 OutStreamer.EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext)); - + OutStreamer.AddBlankLine(); } @@ -413,7 +414,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { void AsmPrinter::EmitFunctionHeader() { // Print out constants referenced by the function EmitConstantPool(); - + // Print the 'header' of function. const Function *F = MF->getFunction(); @@ -435,7 +436,7 @@ void AsmPrinter::EmitFunctionHeader() { // Emit the CurrentFnSym. This is a virtual function to allow targets to // do their wild and crazy things as required. EmitFunctionEntryLabel(); - + // If the function had address-taken blocks that got deleted, then we have // references to the dangling symbols. Emit them at the start of the function // so that we don't get references to undefined symbols. @@ -445,17 +446,17 @@ void AsmPrinter::EmitFunctionHeader() { OutStreamer.AddComment("Address taken block that was later removed"); OutStreamer.EmitLabel(DeadBlockSyms[i]); } - + // Add some workaround for linkonce linkage on Cygwin\MinGW. if (MAI->getLinkOnceDirective() != 0 && (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) { // FIXME: What is this? - MCSymbol *FakeStub = + MCSymbol *FakeStub = OutContext.GetOrCreateSymbol(Twine("Lllvm$workaround$fake$stub$")+ CurrentFnSym->getName()); OutStreamer.EmitLabel(FakeStub); } - + // Emit pre-function debug and/or EH information. if (DE) { NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); @@ -480,44 +481,16 @@ void AsmPrinter::EmitFunctionEntryLabel() { } -static void EmitDebugLoc(DebugLoc DL, const MachineFunction *MF, - raw_ostream &CommentOS) { - const LLVMContext &Ctx = MF->getFunction()->getContext(); - if (!DL.isUnknown()) { // Print source line info. - DIScope Scope(DL.getScope(Ctx)); - // Omit the directory, because it's likely to be long and uninteresting. - if (Scope.Verify()) - CommentOS << Scope.getFilename(); - else - CommentOS << "<unknown>"; - CommentOS << ':' << DL.getLine(); - if (DL.getCol() != 0) - CommentOS << ':' << DL.getCol(); - DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx)); - if (!InlinedAtDL.isUnknown()) { - CommentOS << "[ "; - EmitDebugLoc(InlinedAtDL, MF, CommentOS); - CommentOS << " ]"; - } - } -} - /// EmitComments - Pretty-print comments for instructions. static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { const MachineFunction *MF = MI.getParent()->getParent(); const TargetMachine &TM = MF->getTarget(); - - DebugLoc DL = MI.getDebugLoc(); - if (!DL.isUnknown()) { // Print source line info. - EmitDebugLoc(DL, MF, CommentOS); - CommentOS << '\n'; - } - + // Check for spills and reloads int FI; - + const MachineFrameInfo *FrameInfo = MF->getFrameInfo(); - + // We assume a single instruction only has a spill or reload, not // both. const MachineMemOperand *MMO; @@ -538,7 +511,7 @@ static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { if (FrameInfo->isSpillSlotObjectIndex(FI)) CommentOS << MMO->getSize() << "-byte Folded Spill\n"; } - + // Check for spill-induced copies if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) CommentOS << " Reload Reuse\n"; @@ -612,21 +585,61 @@ static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { } OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg()); } - + OS << '+' << MI->getOperand(1).getImm(); // NOTE: Want this comment at start of line, don't emit with AddComment. AP.OutStreamer.EmitRawText(OS.str()); return true; } +bool AsmPrinter::needsCFIMoves() { + if (UnwindTablesMandatory) + return true; + + if (MMI->hasDebugInfo() && !MAI->doesDwarfRequireFrameSection()) + return true; + + if (MF->getFunction()->doesNotThrow()) + return false; + + return true; +} + +void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { + MCSymbol *Label = MI.getOperand(0).getMCSymbol(); + + if (MAI->doesDwarfRequireFrameSection() || + MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI) + OutStreamer.EmitLabel(Label); + + if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI) + return; + + if (!needsCFIMoves()) + return; + + MachineModuleInfo &MMI = MF->getMMI(); + std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + bool FoundOne = false; + (void)FoundOne; + for (std::vector<MachineMove>::iterator I = Moves.begin(), + E = Moves.end(); I != E; ++I) { + if (I->getLabel() == Label) { + EmitCFIFrameMove(*I); + FoundOne = true; + } + } + assert(FoundOne); +} + /// EmitFunctionBody - This method emits the body and trailer for a /// function. void AsmPrinter::EmitFunctionBody() { // Emit target-specific gunk before the function body. EmitFunctionBodyStart(); - + bool ShouldPrintDebugScopes = DD && MMI->hasDebugInfo(); - + // Print out code for the function. bool HasAnyRealCode = false; const MachineInstr *LastMI = 0; @@ -649,12 +662,15 @@ void AsmPrinter::EmitFunctionBody() { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); DD->beginInstruction(II); } - + if (isVerbose()) EmitComments(*II, OutStreamer.GetCommentOS()); switch (II->getOpcode()) { case TargetOpcode::PROLOG_LABEL: + emitPrologLabel(*II); + break; + case TargetOpcode::EH_LABEL: case TargetOpcode::GC_LABEL: OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol()); @@ -675,10 +691,13 @@ void AsmPrinter::EmitFunctionBody() { if (isVerbose()) EmitKill(II, *this); break; default: + if (!TM.hasMCUseLoc()) + MCLineEntry::Make(&OutStreamer, getCurrentSection()); + EmitInstruction(II); break; } - + if (ShouldPrintDebugScopes) { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); DD->endInstruction(II); @@ -705,10 +724,10 @@ void AsmPrinter::EmitFunctionBody() { } else // Target not mc-ized yet. OutStreamer.EmitRawText(StringRef("\tnop\n")); } - + // Emit target-specific gunk after the function body. EmitFunctionBodyEnd(); - + // If the target wants a .size directive for the size of the function, emit // it. if (MAI->hasDotTypeDotSizeDirective()) { @@ -716,14 +735,14 @@ void AsmPrinter::EmitFunctionBody() { // difference between the function label and the temp label. MCSymbol *FnEndLabel = OutContext.CreateTempSymbol(); OutStreamer.EmitLabel(FnEndLabel); - + const MCExpr *SizeExp = MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext), MCSymbolRefExpr::Create(CurrentFnSym, OutContext), OutContext); OutStreamer.EmitELFSize(CurrentFnSym, SizeExp); } - + // Emit post-function debug information. if (DD) { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); @@ -734,20 +753,71 @@ void AsmPrinter::EmitFunctionBody() { DE->EndFunction(); } MMI->EndFunction(); - + // Print out jump tables referenced by the function. EmitJumpTableInfo(); - + OutStreamer.AddBlankLine(); } /// getDebugValueLocation - Get location information encoded by DBG_VALUE /// operands. -MachineLocation AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { +MachineLocation AsmPrinter:: +getDebugValueLocation(const MachineInstr *MI) const { // Target specific DBG_VALUE instructions are handled by each target. return MachineLocation(); } +/// getDwarfRegOpSize - get size required to emit given machine location using +/// dwarf encoding. +unsigned AsmPrinter::getDwarfRegOpSize(const MachineLocation &MLoc) const { + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + unsigned DWReg = RI->getDwarfRegNum(MLoc.getReg(), false); + if (int Offset = MLoc.getOffset()) { + // If the value is at a certain offset from frame register then + // use DW_OP_breg. + if (DWReg < 32) + return 1 + MCAsmInfo::getSLEB128Size(Offset); + else + return 1 + MCAsmInfo::getULEB128Size(MLoc.getReg()) + + MCAsmInfo::getSLEB128Size(Offset); + } + if (DWReg < 32) + return 1; + + return 1 + MCAsmInfo::getULEB128Size(DWReg); +} + +/// EmitDwarfRegOp - Emit dwarf register operation. +void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + unsigned Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); + if (int Offset = MLoc.getOffset()) { + if (Reg < 32) { + OutStreamer.AddComment( + dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg)); + EmitInt8(dwarf::DW_OP_breg0 + Reg); + } else { + OutStreamer.AddComment("DW_OP_bregx"); + EmitInt8(dwarf::DW_OP_bregx); + OutStreamer.AddComment(Twine(Reg)); + EmitULEB128(Reg); + } + EmitSLEB128(Offset); + } else { + if (Reg < 32) { + OutStreamer.AddComment( + dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg)); + EmitInt8(dwarf::DW_OP_reg0 + Reg); + } else { + OutStreamer.AddComment("DW_OP_regx"); + EmitInt8(dwarf::DW_OP_regx); + OutStreamer.AddComment(Twine(Reg)); + EmitULEB128(Reg); + } + } +} + bool AsmPrinter::doFinalization(Module &M) { // Emit global variables. for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); @@ -782,7 +852,7 @@ bool AsmPrinter::doFinalization(Module &M) { } delete DD; DD = 0; } - + // If the target wants to know about weak references, print them all. if (MAI->getWeakRefDirective()) { // FIXME: This is not lazy, it would be nice to only print weak references @@ -796,7 +866,7 @@ bool AsmPrinter::doFinalization(Module &M) { if (!I->hasExternalWeakLinkage()) continue; OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference); } - + for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) { if (!I->hasExternalWeakLinkage()) continue; OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference); @@ -822,7 +892,7 @@ bool AsmPrinter::doFinalization(Module &M) { EmitVisibility(Name, I->getVisibility()); // Emit the directives as assignments aka .set: - OutStreamer.EmitAssignment(Name, + OutStreamer.EmitAssignment(Name, MCSymbolRefExpr::Create(Target, OutContext)); } } @@ -839,14 +909,14 @@ bool AsmPrinter::doFinalization(Module &M) { if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty()) if (const MCSection *S = MAI->getNonexecutableStackSection(OutContext)) OutStreamer.SwitchSection(S); - + // Allow the target to emit any magic that it wants at the end of the file, // after everything else has gone out. EmitEndOfAsmFile(M); - + delete Mang; Mang = 0; MMI = 0; - + OutStreamer.Finish(); return false; } @@ -886,7 +956,7 @@ void AsmPrinter::EmitConstantPool() { for (unsigned i = 0, e = CP.size(); i != e; ++i) { const MachineConstantPoolEntry &CPE = CP[i]; unsigned Align = CPE.getAlignment(); - + SectionKind Kind; switch (CPE.getRelocationInfo()) { default: llvm_unreachable("Unknown section kind"); @@ -904,7 +974,7 @@ void AsmPrinter::EmitConstantPool() { } const MCSection *S = getObjFileLowering().getSectionForConstant(Kind); - + // The number of sections are small, just do a linear search from the // last section to the first. bool Found = false; @@ -953,7 +1023,7 @@ void AsmPrinter::EmitConstantPool() { } /// EmitJumpTableInfo - Print assembly representations of the jump tables used -/// by the current function to the current output stream. +/// by the current function to the current output stream. /// void AsmPrinter::EmitJumpTableInfo() { const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); @@ -962,7 +1032,7 @@ void AsmPrinter::EmitJumpTableInfo() { const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); if (JT.empty()) return; - // Pick the directive to use to print the jump table entries, and switch to + // Pick the directive to use to print the jump table entries, and switch to // the appropriate section. const Function *F = MF->getFunction(); bool JTInDiffSection = false; @@ -978,18 +1048,18 @@ void AsmPrinter::EmitJumpTableInfo() { OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F,Mang,TM)); } else { // Otherwise, drop it in the readonly section. - const MCSection *ReadOnlySection = + const MCSection *ReadOnlySection = getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly()); OutStreamer.SwitchSection(ReadOnlySection); JTInDiffSection = true; } EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData()))); - + for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; - - // If this jump table was deleted, ignore it. + + // If this jump table was deleted, ignore it. if (JTBBs.empty()) continue; // For the EK_LabelDifference32 entry, if the target supports .set, emit a @@ -1003,15 +1073,15 @@ void AsmPrinter::EmitJumpTableInfo() { for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) { const MachineBasicBlock *MBB = JTBBs[ii]; if (!EmittedSets.insert(MBB)) continue; - + // .set LJTSet, LBB32-base const MCExpr *LHS = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext); OutStreamer.EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()), MCBinaryExpr::CreateSub(LHS, Base, OutContext)); } - } - + } + // On some targets (e.g. Darwin) we want to emit two consecutive labels // before each jump table. The first label is never referenced, but tells // the assembler and linker the extents of the jump table object. The @@ -1064,8 +1134,8 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, // If the .set directive is supported, this is emitted as: // .set L4_5_set_123, LBB123 - LJTI1_2 // .word L4_5_set_123 - - // If we have emitted set directives for the jump table entries, print + + // If we have emitted set directives for the jump table entries, print // them rather than the entries themselves. If we're emitting PIC, then // emit the table entries as differences between two text section labels. if (MAI->hasSetDirective()) { @@ -1081,9 +1151,9 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, break; } } - + assert(Value && "Unknown entry kind!"); - + unsigned EntrySize = MJTI->getEntrySize(*TM.getTargetData()); OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0); } @@ -1103,18 +1173,18 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { if (GV->getSection() == "llvm.metadata" || GV->hasAvailableExternallyLinkage()) return true; - + if (!GV->hasAppendingLinkage()) return false; assert(GV->hasInitializer() && "Not a special LLVM global!"); - + const TargetData *TD = TM.getTargetData(); unsigned Align = Log2_32(TD->getPointerPrefAlignment()); if (GV->getName() == "llvm.global_ctors") { OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection()); EmitAlignment(Align); EmitXXStructorList(GV->getInitializer()); - + if (TM.getRelocationModel() == Reloc::Static && MAI->hasStaticCtorDtorReferenceInStaticMode()) { StringRef Sym(".constructors_used"); @@ -1122,8 +1192,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { MCSA_Reference); } return true; - } - + } + if (GV->getName() == "llvm.global_dtors") { OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection()); EmitAlignment(Align); @@ -1137,7 +1207,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { } return true; } - + return false; } @@ -1148,7 +1218,7 @@ void AsmPrinter::EmitLLVMUsedList(Constant *List) { // Should be an array of 'i8*'. ConstantArray *InitList = dyn_cast<ConstantArray>(List); if (InitList == 0) return; - + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { const GlobalValue *GV = dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts()); @@ -1157,7 +1227,7 @@ void AsmPrinter::EmitLLVMUsedList(Constant *List) { } } -/// EmitXXStructorList - Emit the ctor or dtor list. This just prints out the +/// EmitXXStructorList - Emit the ctor or dtor list. This just prints out the /// function pointers, ignoring the init priority. void AsmPrinter::EmitXXStructorList(Constant *List) { // Should be an array of '{ int, void ()* }' structs. The first value is the @@ -1203,11 +1273,11 @@ void AsmPrinter::EmitInt32(int Value) const { void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, unsigned Size) const { // Get the Hi-Lo expression. - const MCExpr *Diff = + const MCExpr *Diff = MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(Hi, OutContext), MCSymbolRefExpr::Create(Lo, OutContext), OutContext); - + if (!MAI->hasSetDirective()) { OutStreamer.EmitValue(Diff, Size, 0/*AddrSpace*/); return; @@ -1219,27 +1289,27 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, OutStreamer.EmitSymbolValue(SetLabel, Size, 0/*AddrSpace*/); } -/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo" +/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo" /// where the size in bytes of the directive is specified by Size and Hi/Lo /// specify the labels. This implicitly uses .set if it is available. void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, - const MCSymbol *Lo, unsigned Size) + const MCSymbol *Lo, unsigned Size) const { - + // Emit Hi+Offset - Lo // Get the Hi+Offset expression. const MCExpr *Plus = - MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext), + MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext), MCConstantExpr::Create(Offset, OutContext), OutContext); - + // Get the Hi+Offset-Lo expression. - const MCExpr *Diff = + const MCExpr *Diff = MCBinaryExpr::CreateSub(Plus, MCSymbolRefExpr::Create(Lo, OutContext), OutContext); - - if (!MAI->hasSetDirective()) + + if (!MAI->hasSetDirective()) OutStreamer.EmitValue(Diff, 4, 0/*AddrSpace*/); else { // Otherwise, emit with .set (aka assignment). @@ -1249,22 +1319,22 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, } } -/// EmitLabelPlusOffset - Emit something like ".long Label+Offset" +/// EmitLabelPlusOffset - Emit something like ".long Label+Offset" /// where the size in bytes of the directive is specified by Size and Label /// specifies the label. This implicitly uses .set if it is available. void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, - unsigned Size) + unsigned Size) const { - + // Emit Label+Offset const MCExpr *Plus = - MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Label, OutContext), + MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Label, OutContext), MCConstantExpr::Create(Offset, OutContext), OutContext); - + OutStreamer.EmitValue(Plus, 4, 0/*AddrSpace*/); } - + //===----------------------------------------------------------------------===// @@ -1276,9 +1346,9 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, // void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const { if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getTargetData(), NumBits); - + if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment. - + if (getCurrentSection()->getKind().isText()) OutStreamer.EmitCodeAlignment(1 << NumBits); else @@ -1293,25 +1363,25 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const { /// static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { MCContext &Ctx = AP.OutContext; - + if (CV->isNullValue() || isa<UndefValue>(CV)) return MCConstantExpr::Create(0, Ctx); if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) return MCConstantExpr::Create(CI->getZExtValue(), Ctx); - + if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx); if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); - + const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); if (CE == 0) { llvm_unreachable("Unknown constant value to lower!"); return MCConstantExpr::Create(0, Ctx); } - + switch (CE->getOpcode()) { default: // If the code isn't optimized, there may be outstanding folding @@ -1339,21 +1409,21 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end()); int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), &IdxVec[0], IdxVec.size()); - + const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); if (Offset == 0) return Base; - + // Truncate/sext the offset to the pointer size. if (TD.getPointerSizeInBits() != 64) { int SExtAmount = 64-TD.getPointerSizeInBits(); Offset = (Offset << SExtAmount) >> SExtAmount; } - + return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), Ctx); } - + case Instruction::Trunc: // We emit the value and depend on the assembler to truncate the generated // expression properly. This is important for differences between @@ -1372,7 +1442,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { false/*ZExt*/); return LowerConstant(Op, AP); } - + case Instruction::PtrToInt: { const TargetData &TD = *AP.TM.getTargetData(); // Support only foldable casts to/from pointers that can be eliminated by @@ -1394,7 +1464,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx); return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); } - + // The MC library also has a right-shift operator, but it isn't consistently // signed or unsigned between different targets. case Instruction::Add: @@ -1435,7 +1505,7 @@ static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP); return; } - + // Otherwise, it can be emitted as .ascii. SmallVector<char, 128> TmpVec; TmpVec.reserve(CA->getNumOperands()); @@ -1493,7 +1563,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace); return; } - + if (CFP->getType()->isFloatTy()) { if (AP.isVerbose()) { float Val = CFP->getValueAPF().convertToFloat(); @@ -1503,7 +1573,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, AP.OutStreamer.EmitIntValue(Val, 4, AddrSpace); return; } - + if (CFP->getType()->isX86_FP80Ty()) { // all long double variants are printed as hex // API needed to prevent premature destruction @@ -1518,7 +1588,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, AP.OutStreamer.GetCommentOS() << "x86_fp80 ~= " << DoubleVal.convertToDouble() << '\n'; } - + if (AP.TM.getTargetData()->isBigEndian()) { AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace); AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); @@ -1526,14 +1596,14 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace); } - + // Emit the tail padding for the long double. const TargetData &TD = *AP.TM.getTargetData(); AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) - TD.getTypeStoreSize(CFP->getType()), AddrSpace); return; } - + assert(CFP->getType()->isPPC_FP128Ty() && "Floating point constant type not handled"); // All long double variants are printed as hex @@ -1588,10 +1658,10 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, return; } } - + if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) return EmitGlobalConstantArray(CVA, AddrSpace, AP); - + if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) return EmitGlobalConstantStruct(CVS, AddrSpace, AP); @@ -1603,10 +1673,10 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, AP.OutStreamer.EmitIntValue(0, Size, AddrSpace); return; } - + if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) return EmitGlobalConstantVector(V, AddrSpace, AP); - + // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. AP.OutStreamer.EmitValue(LowerConstant(CV, AP), @@ -1703,7 +1773,7 @@ MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const { SmallString<60> NameStr; Mang->getNameWithPrefix(NameStr, Sym); return OutContext.GetOrCreateSymbol(NameStr.str()); -} +} @@ -1740,10 +1810,10 @@ static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB, // Add loop depth information const MachineLoop *Loop = LI->getLoopFor(&MBB); if (Loop == 0) return; - + MachineBasicBlock *Header = Loop->getHeader(); assert(Header && "No header for loop"); - + // If this block is not a loop header, just print out what is the loop header // and return. if (Header != &MBB) { @@ -1753,21 +1823,21 @@ static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB, " Depth="+Twine(Loop->getLoopDepth())); return; } - + // Otherwise, it is a loop header. Print out information about child and // parent loops. raw_ostream &OS = AP.OutStreamer.GetCommentOS(); - - PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber()); - + + PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber()); + OS << "=>"; OS.indent(Loop->getLoopDepth()*2-2); - + OS << "This "; if (Loop->empty()) OS << "Inner "; OS << "Loop Header: Depth=" + Twine(Loop->getLoopDepth()) << '\n'; - + PrintChildLoopComment(OS, Loop, AP.getFunctionNumber()); } @@ -1788,7 +1858,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { const BasicBlock *BB = MBB->getBasicBlock(); if (isVerbose()) OutStreamer.AddComment("Block address taken"); - + std::vector<MCSymbol*> Syms = MMI->getAddrLabelSymbolToEmit(BB); for (unsigned i = 0, e = Syms.size(); i != e; ++i) @@ -1801,9 +1871,9 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { if (const BasicBlock *BB = MBB->getBasicBlock()) if (BB->hasName()) OutStreamer.AddComment("%" + BB->getName()); - + EmitBasicBlockLoopComments(*MBB, LI, *this); - + // NOTE: Want this comment at start of line, don't emit with AddComment. OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" + Twine(MBB->getNumber()) + ":"); @@ -1823,7 +1893,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition) const { MCSymbolAttr Attr = MCSA_Invalid; - + switch (Visibility) { default: break; case GlobalValue::HiddenVisibility: @@ -1850,23 +1920,23 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { // then nothing falls through to it. if (MBB->isLandingPad() || MBB->pred_empty()) return false; - + // If there isn't exactly one predecessor, it can't be a fall through. MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI; ++PI2; if (PI2 != MBB->pred_end()) return false; - + // The predecessor has to be immediately before this block. const MachineBasicBlock *Pred = *PI; - + if (!Pred->isLayoutSuccessor(MBB)) return false; - + // If the block is completely empty, then it definitely does fall through. if (Pred->empty()) return true; - + // Otherwise, check the last instruction. const MachineInstr &LastInst = Pred->back(); return !LastInst.getDesc().isBarrier(); @@ -1882,9 +1952,9 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { gcp_map_type::iterator GCPI = GCMap.find(S); if (GCPI != GCMap.end()) return GCPI->second; - + const char *Name = S->getName().c_str(); - + for (GCMetadataPrinterRegistry::iterator I = GCMetadataPrinterRegistry::begin(), E = GCMetadataPrinterRegistry::end(); I != E; ++I) @@ -1894,7 +1964,7 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { GCMap.insert(std::make_pair(S, GMP)); return GMP; } - + report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name)); return 0; } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 98a1bf2..6d1708a 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -155,7 +155,7 @@ void AsmPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const { const TargetLoweringObjectFile &TLOF = getObjFileLowering(); const MCExpr *Exp = - TLOF.getExprForDwarfReference(Sym, Mang, MMI, Encoding, OutStreamer); + TLOF.getExprForDwarfReference(Sym, Encoding, OutStreamer); OutStreamer.EmitAbsValue(Exp, GetSizeOfEncodedValue(Encoding)); } @@ -277,42 +277,42 @@ void AsmPrinter::EmitFrameMoves(const std::vector<MachineMove> &Moves, } } -/// EmitFrameMoves - Emit frame instructions to describe the layout of the -/// frame. -void AsmPrinter::EmitCFIFrameMoves(const std::vector<MachineMove> &Moves) const { +/// EmitFrameMoves - Emit a frame instruction. +void AsmPrinter::EmitCFIFrameMove(const MachineMove &Move) const { const TargetRegisterInfo *RI = TM.getRegisterInfo(); - int stackGrowth = TM.getTargetData()->getPointerSize(); - if (TM.getFrameLowering()->getStackGrowthDirection() != - TargetFrameLowering::StackGrowsUp) - stackGrowth *= -1; + const MachineLocation &Dst = Move.getDestination(); + const MachineLocation &Src = Move.getSource(); + + // If advancing cfa. + if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { + assert(!Src.isReg() && "Machine move not supported yet."); + if (Src.getReg() == MachineLocation::VirtualFP) { + OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset()); + } else { + assert("Machine move not supported yet"); + // Reg + Offset + } + } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { + assert(Dst.isReg() && "Machine move not supported yet."); + OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true)); + } else { + assert(!Dst.isReg() && "Machine move not supported yet."); + OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true), + Dst.getOffset()); + } +} + +/// EmitFrameMoves - Emit frame instructions to describe the layout of the +/// frame. +void AsmPrinter::EmitCFIFrameMoves(const std::vector<MachineMove> &Moves) const { for (unsigned i = 0, N = Moves.size(); i < N; ++i) { const MachineMove &Move = Moves[i]; MCSymbol *Label = Move.getLabel(); // Throw out move if the label is invalid. if (Label && !Label->isDefined()) continue; // Not emitted, in dead code. - const MachineLocation &Dst = Move.getDestination(); - const MachineLocation &Src = Move.getSource(); - - // If advancing cfa. - if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { - assert(!Src.isReg() && "Machine move not supported yet."); - - if (Src.getReg() == MachineLocation::VirtualFP) { - OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset()); - } else { - assert("Machine move not supported yet"); - // Reg + Offset - } - } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { - assert(Dst.isReg() && "Machine move not supported yet."); - OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true)); - } else { - assert(!Dst.isReg() && "Machine move not supported yet."); - OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true), - Dst.getOffset()); - } + EmitCFIFrameMove(Move); } } diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index 306efad..d2be552 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -1,9 +1,11 @@ add_llvm_library(LLVMAsmPrinter + ARMException.cpp AsmPrinter.cpp AsmPrinterDwarf.cpp AsmPrinterInlineAsm.cpp DIE.cpp DwarfCFIException.cpp + DwarfCompileUnit.cpp DwarfDebug.cpp DwarfException.cpp DwarfTableException.cpp diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index d56c094..7d61f1e 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -70,7 +70,6 @@ namespace llvm { public: DIEAbbrev(unsigned T, unsigned C) : Tag(T), ChildrenFlag(C), Data() {} - virtual ~DIEAbbrev() {} // Accessors. unsigned getTag() const { return Tag; } diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 68be2ee..dbd52c4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -32,6 +32,7 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" @@ -40,7 +41,7 @@ using namespace llvm; DwarfCFIException::DwarfCFIException(AsmPrinter *A) : DwarfException(A), - shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false) + shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false) {} DwarfCFIException::~DwarfCFIException() {} @@ -51,81 +52,85 @@ void DwarfCFIException::EndModule() { if (!Asm->MAI->isExceptionHandlingDwarf()) return; - if (!shouldEmitTableModule) - return; - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + unsigned PerEncoding = TLOF.getPersonalityEncoding(); - // Begin eh frame section. - Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); + if ((PerEncoding & 0x70) != dwarf::DW_EH_PE_pcrel) + return; // Emit references to all used personality functions + bool AtLeastOne = false; const std::vector<const Function*> &Personalities = MMI->getPersonalities(); for (size_t i = 0, e = Personalities.size(); i != e; ++i) { - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("personality", i)); - Asm->EmitReference(Personalities[i], PerEncoding); + if (!Personalities[i]) + continue; + MCSymbol *Sym = Asm->Mang->getSymbol(Personalities[i]); + TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym); + AtLeastOne = true; + } + + if (AtLeastOne && !TLOF.isFunctionEHFrameSymbolPrivate()) { + // This is a temporary hack to keep sections in the same order they + // were before. This lets us produce bit identical outputs while + // transitioning to CFI. + Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); } } /// BeginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. void DwarfCFIException::BeginFunction(const MachineFunction *MF) { - shouldEmitTable = shouldEmitMoves = false; + shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; // If any landing pads survive, we need an EH table. - shouldEmitTable = !MMI->getLandingPads().empty(); + bool hasLandingPads = !MMI->getLandingPads().empty(); // See if we need frame move info. - shouldEmitMoves = - !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory; + shouldEmitMoves = Asm->needsCFIMoves(); - if (shouldEmitMoves || shouldEmitTable) - // Assumes in correct section after the entry point. - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", - Asm->getFunctionNumber())); + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + unsigned PerEncoding = TLOF.getPersonalityEncoding(); + const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()]; - shouldEmitTableModule |= shouldEmitTable; + shouldEmitPersonality = hasLandingPads && + PerEncoding != dwarf::DW_EH_PE_omit && Per; - if (shouldEmitMoves) { - const TargetFrameLowering *TFL = Asm->TM.getFrameLowering(); - Asm->OutStreamer.EmitCFIStartProc(); + unsigned LSDAEncoding = TLOF.getLSDAEncoding(); + shouldEmitLSDA = shouldEmitPersonality && + LSDAEncoding != dwarf::DW_EH_PE_omit; - // Indicate locations of general callee saved registers in frame. - std::vector<MachineMove> Moves; - TFL->getInitialFrameState(Moves); - Asm->EmitCFIFrameMoves(Moves); - Asm->EmitCFIFrameMoves(MMI->getFrameMoves()); - } + if (!shouldEmitPersonality && !shouldEmitMoves) + return; - if (!shouldEmitTable) + Asm->OutStreamer.EmitCFIStartProc(); + + // Indicate personality routine, if any. + if (!shouldEmitPersonality) return; - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI); + Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", + Asm->getFunctionNumber())); // Provide LSDA information. - unsigned LSDAEncoding = TLOF.getLSDAEncoding(); - if (LSDAEncoding != dwarf::DW_EH_PE_omit) - Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception", - Asm->getFunctionNumber()), - LSDAEncoding); + if (!shouldEmitLSDA) + return; - // Indicate personality routine, if any. - unsigned PerEncoding = TLOF.getPersonalityEncoding(); - if (PerEncoding != dwarf::DW_EH_PE_omit && - MMI->getPersonalities()[MMI->getPersonalityIndex()]) - Asm->OutStreamer.EmitCFIPersonality(Asm->GetTempSymbol("personality", - MMI->getPersonalityIndex()), - PerEncoding); + Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception", + Asm->getFunctionNumber()), + LSDAEncoding); } /// EndFunction - Gather and emit post-function exception information. /// void DwarfCFIException::EndFunction() { - if (!shouldEmitMoves && !shouldEmitTable) return; + if (!shouldEmitPersonality && !shouldEmitMoves) + return; - if (shouldEmitMoves) - Asm->OutStreamer.EmitCFIEndProc(); + Asm->OutStreamer.EmitCFIEndProc(); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber())); @@ -133,6 +138,6 @@ void DwarfCFIException::EndFunction() { // Map all labels and get rid of any dead landing pads. MMI->TidyLandingPads(); - if (shouldEmitTable) + if (shouldEmitPersonality) EmitExceptionTable(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp new file mode 100644 index 0000000..7ce0cfe --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -0,0 +1,983 @@ +//===-- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Unit ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dwarfdebug" + +#include "DwarfCompileUnit.h" +#include "DwarfDebug.h" +#include "llvm/Constants.h" +#include "llvm/Analysis/DIBuilder.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +/// CompileUnit - Compile unit constructor. +CompileUnit::CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW) + : ID(I), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) { + DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); +} + +/// ~CompileUnit - Destructor for compile unit. +CompileUnit::~CompileUnit() { + for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j) + DIEBlocks[j]->~DIEBlock(); +} + +/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug +/// information entry. +DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) { + DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry); + return Value; +} + +/// addUInt - Add an unsigned integer attribute data and value. +/// +void CompileUnit::addUInt(DIE *Die, unsigned Attribute, + unsigned Form, uint64_t Integer) { + if (!Form) Form = DIEInteger::BestForm(false, Integer); + DIEValue *Value = Integer == 1 ? + DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer); + Die->addValue(Attribute, Form, Value); +} + +/// addSInt - Add an signed integer attribute data and value. +/// +void CompileUnit::addSInt(DIE *Die, unsigned Attribute, + unsigned Form, int64_t Integer) { + if (!Form) Form = DIEInteger::BestForm(true, Integer); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); + Die->addValue(Attribute, Form, Value); +} + +/// addString - Add a string attribute data and value. DIEString only +/// keeps string reference. +void CompileUnit::addString(DIE *Die, unsigned Attribute, unsigned Form, + StringRef String) { + DIEValue *Value = new (DIEValueAllocator) DIEString(String); + Die->addValue(Attribute, Form, Value); +} + +/// addLabel - Add a Dwarf label attribute data and value. +/// +void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Label) { + DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); + Die->addValue(Attribute, Form, Value); +} + +/// addDelta - Add a label delta attribute data and value. +/// +void CompileUnit::addDelta(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Hi, const MCSymbol *Lo) { + DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); + Die->addValue(Attribute, Form, Value); +} + +/// addDIEEntry - Add a DIE attribute data and value. +/// +void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, + DIE *Entry) { + Die->addValue(Attribute, Form, createDIEEntry(Entry)); +} + + +/// addBlock - Add block data. +/// +void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form, + DIEBlock *Block) { + Block->ComputeSize(Asm); + DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on. + Die->addValue(Attribute, Block->BestForm(), Block); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DIVariable V) { + // Verify variable. + if (!V.Verify()) + return; + + unsigned Line = V.getLineNumber(); + if (Line == 0) + return; + unsigned FileID = DD->GetOrCreateSourceID(V.getContext().getFilename(), + V.getContext().getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { + // Verify global variable. + if (!G.Verify()) + return; + + unsigned Line = G.getLineNumber(); + if (Line == 0) + return; + unsigned FileID = DD->GetOrCreateSourceID(G.getContext().getFilename(), + G.getContext().getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { + // Verify subprogram. + if (!SP.Verify()) + return; + // If the line number is 0, don't add it. + if (SP.getLineNumber() == 0) + return; + + unsigned Line = SP.getLineNumber(); + if (!SP.getContext().Verify()) + return; + unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(), SP.getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DIType Ty) { + // Verify type. + if (!Ty.Verify()) + return; + + unsigned Line = Ty.getLineNumber(); + if (Line == 0 || !Ty.getContext().Verify()) + return; + unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(), Ty.getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) { + // Verify namespace. + if (!NS.Verify()) + return; + + unsigned Line = NS.getLineNumber(); + if (Line == 0) + return; + StringRef FN = NS.getFilename(); + + unsigned FileID = DD->GetOrCreateSourceID(FN, NS.getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addVariableAddress - Add DW_AT_location attribute for a +/// DbgVariable based on provided MachineLocation. +void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die, + MachineLocation Location) { + if (DV->variableHasComplexAddress()) + addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); + else if (DV->isBlockByrefVariable()) + addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); + else + addAddress(Die, dwarf::DW_AT_location, Location); +} + +/// addRegisterOp - Add register operand. +void CompileUnit::addRegisterOp(DIE *TheDie, unsigned Reg) { + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + unsigned DWReg = RI->getDwarfRegNum(Reg, false); + if (DWReg < 32) + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg); + else { + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); + addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg); + } +} + +/// addRegisterOffset - Add register offset. +void CompileUnit::addRegisterOffset(DIE *TheDie, unsigned Reg, + int64_t Offset) { + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + unsigned DWReg = RI->getDwarfRegNum(Reg, false); + const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); + if (Reg == TRI->getFrameRegister(*Asm->MF)) + // If variable offset is based in frame register then use fbreg. + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg); + else if (DWReg < 32) + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg); + else { + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg); + } + addSInt(TheDie, 0, dwarf::DW_FORM_sdata, Offset); +} + +/// addAddress - Add an address attribute to a die based on the location +/// provided. +void CompileUnit::addAddress(DIE *Die, unsigned Attribute, + const MachineLocation &Location) { + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + + if (Location.isReg()) + addRegisterOp(Block, Location.getReg()); + else + addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, 0, Block); +} + +/// addComplexAddress - Start with the address based on the location provided, +/// and generate the DWARF information necessary to find the actual variable +/// given the extra address information encoded in the DIVariable, starting from +/// the starting location. Add the DWARF information to the die. +/// +void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die, + unsigned Attribute, + const MachineLocation &Location) { + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + unsigned N = DV->getNumAddrElements(); + unsigned i = 0; + if (Location.isReg()) { + if (N >= 2 && DV->getAddrElement(0) == DIBuilder::OpPlus) { + // If first address element is OpPlus then emit + // DW_OP_breg + Offset instead of DW_OP_reg + Offset. + addRegisterOffset(Block, Location.getReg(), DV->getAddrElement(1)); + i = 2; + } else + addRegisterOp(Block, Location.getReg()); + } + else + addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + + for (;i < N; ++i) { + uint64_t Element = DV->getAddrElement(i); + if (Element == DIBuilder::OpPlus) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i)); + } else if (Element == DIBuilder::OpDeref) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + } else llvm_unreachable("unknown DIBuilder Opcode"); + } + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, 0, Block); +} + +/* Byref variables, in Blocks, are declared by the programmer as "SomeType + VarName;", but the compiler creates a __Block_byref_x_VarName struct, and + gives the variable VarName either the struct, or a pointer to the struct, as + its type. This is necessary for various behind-the-scenes things the + compiler needs to do with by-reference variables in Blocks. + + However, as far as the original *programmer* is concerned, the variable + should still have type 'SomeType', as originally declared. + + The function getBlockByrefType dives into the __Block_byref_x_VarName + struct to find the original type of the variable, which is then assigned to + the variable's Debug Information Entry as its real type. So far, so good. + However now the debugger will expect the variable VarName to have the type + SomeType. So we need the location attribute for the variable to be an + expression that explains to the debugger how to navigate through the + pointers and struct to find the actual variable of type SomeType. + + The following function does just that. We start by getting + the "normal" location for the variable. This will be the location + of either the struct __Block_byref_x_VarName or the pointer to the + struct __Block_byref_x_VarName. + + The struct will look something like: + + struct __Block_byref_x_VarName { + ... <various fields> + struct __Block_byref_x_VarName *forwarding; + ... <various other fields> + SomeType VarName; + ... <maybe more fields> + }; + + If we are given the struct directly (as our starting point) we + need to tell the debugger to: + + 1). Add the offset of the forwarding field. + + 2). Follow that pointer to get the real __Block_byref_x_VarName + struct to use (the real one may have been copied onto the heap). + + 3). Add the offset for the field VarName, to find the actual variable. + + If we started with a pointer to the struct, then we need to + dereference that pointer first, before the other steps. + Translating this into DWARF ops, we will need to append the following + to the current location description for the variable: + + DW_OP_deref -- optional, if we start with a pointer + DW_OP_plus_uconst <forward_fld_offset> + DW_OP_deref + DW_OP_plus_uconst <varName_fld_offset> + + That is what this function does. */ + +/// addBlockByrefAddress - Start with the address based on the location +/// provided, and generate the DWARF information necessary to find the +/// actual Block variable (navigating the Block struct) based on the +/// starting location. Add the DWARF information to the die. For +/// more information, read large comment just above here. +/// +void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, + unsigned Attribute, + const MachineLocation &Location) { + DIType Ty = DV->getType(); + DIType TmpTy = Ty; + unsigned Tag = Ty.getTag(); + bool isPointer = false; + + StringRef varName = DV->getName(); + + if (Tag == dwarf::DW_TAG_pointer_type) { + DIDerivedType DTy = DIDerivedType(Ty); + TmpTy = DTy.getTypeDerivedFrom(); + isPointer = true; + } + + DICompositeType blockStruct = DICompositeType(TmpTy); + + // Find the __forwarding field and the variable field in the __Block_byref + // struct. + DIArray Fields = blockStruct.getTypeArray(); + DIDescriptor varField = DIDescriptor(); + DIDescriptor forwardingField = DIDescriptor(); + + for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) { + DIDescriptor Element = Fields.getElement(i); + DIDerivedType DT = DIDerivedType(Element); + StringRef fieldName = DT.getName(); + if (fieldName == "__forwarding") + forwardingField = Element; + else if (fieldName == varName) + varField = Element; + } + + // Get the offsets for the forwarding field and the variable field. + unsigned forwardingFieldOffset = + DIDerivedType(forwardingField).getOffsetInBits() >> 3; + unsigned varFieldOffset = + DIDerivedType(varField).getOffsetInBits() >> 3; + + // Decode the original location, and use that as the start of the byref + // variable's location. + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + + if (Location.isReg()) { + if (Reg < 32) + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); + else { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); + addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + } else { + if (Reg < 32) + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + else { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + + addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); + } + + // If we started with a pointer to the __Block_byref... struct, then + // the first thing we need to do is dereference the pointer (DW_OP_deref). + if (isPointer) + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + + // Next add the offset for the '__forwarding' field: + // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in + // adding the offset if it's 0. + if (forwardingFieldOffset > 0) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset); + } + + // Now dereference the __forwarding field to get to the real __Block_byref + // struct: DW_OP_deref. + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + + // Now that we've got the real __Block_byref... struct, add the offset + // for the variable's field to get to the location of the actual variable: + // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. + if (varFieldOffset > 0) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset); + } + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, 0, Block); +} + +/// addConstantValue - Add constant value entry in variable DIE. +bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO) { + assert (MO.isImm() && "Invalid machine operand!"); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + unsigned Imm = MO.getImm(); + addUInt(Block, 0, dwarf::DW_FORM_udata, Imm); + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + return true; +} + +/// addConstantFPValue - Add constant value entry in variable DIE. +bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { + assert (MO.isFPImm() && "Invalid machine operand!"); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + APFloat FPImm = MO.getFPImm()->getValueAPF(); + + // Get the raw data form of the floating point. + const APInt FltVal = FPImm.bitcastToAPInt(); + const char *FltPtr = (const char*)FltVal.getRawData(); + + int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. + bool LittleEndian = Asm->getTargetData().isLittleEndian(); + int Incr = (LittleEndian ? 1 : -1); + int Start = (LittleEndian ? 0 : NumBytes - 1); + int Stop = (LittleEndian ? NumBytes : -1); + + // Output the constant to DWARF one byte at a time. + for (; Start != Stop; Start += Incr) + addUInt(Block, 0, dwarf::DW_FORM_data1, + (unsigned char)0xFF & FltPtr[Start]); + + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + return true; +} + +/// addConstantValue - Add constant value entry in variable DIE. +bool CompileUnit::addConstantValue(DIE *Die, ConstantInt *CI, + bool Unsigned) { + if (CI->getBitWidth() <= 64) { + if (Unsigned) + addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, + CI->getZExtValue()); + else + addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, + CI->getSExtValue()); + return true; + } + + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + + // Get the raw data form of the large APInt. + const APInt Val = CI->getValue(); + const char *Ptr = (const char*)Val.getRawData(); + + int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte. + bool LittleEndian = Asm->getTargetData().isLittleEndian(); + int Incr = (LittleEndian ? 1 : -1); + int Start = (LittleEndian ? 0 : NumBytes - 1); + int Stop = (LittleEndian ? NumBytes : -1); + + // Output the constant to DWARF one byte at a time. + for (; Start != Stop; Start += Incr) + addUInt(Block, 0, dwarf::DW_FORM_data1, + (unsigned char)0xFF & Ptr[Start]); + + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + return true; +} + +/// addTemplateParams - Add template parameters in buffer. +void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { + // Add template parameters. + for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) { + DIDescriptor Element = TParams.getElement(i); + if (Element.isTemplateTypeParameter()) + Buffer.addChild(getOrCreateTemplateTypeParameterDIE( + DITemplateTypeParameter(Element))); + else if (Element.isTemplateValueParameter()) + Buffer.addChild(getOrCreateTemplateValueParameterDIE( + DITemplateValueParameter(Element))); + } + +} +/// addToContextOwner - Add Die into the list of its context owner's children. +void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) { + if (Context.isType()) { + DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context)); + ContextDIE->addChild(Die); + } else if (Context.isNameSpace()) { + DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context)); + ContextDIE->addChild(Die); + } else if (Context.isSubprogram()) { + DIE *ContextDIE = DD->createSubprogramDIE(DISubprogram(Context)); + ContextDIE->addChild(Die); + } else if (DIE *ContextDIE = getDIE(Context)) + ContextDIE->addChild(Die); + else + addDie(Die); +} + +/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the +/// given DIType. +DIE *CompileUnit::getOrCreateTypeDIE(DIType Ty) { + DIE *TyDIE = getDIE(Ty); + if (TyDIE) + return TyDIE; + + // Create new type. + TyDIE = new DIE(dwarf::DW_TAG_base_type); + insertDIE(Ty, TyDIE); + if (Ty.isBasicType()) + constructTypeDIE(*TyDIE, DIBasicType(Ty)); + else if (Ty.isCompositeType()) + constructTypeDIE(*TyDIE, DICompositeType(Ty)); + else { + assert(Ty.isDerivedType() && "Unknown kind of DIType"); + constructTypeDIE(*TyDIE, DIDerivedType(Ty)); + } + + addToContextOwner(TyDIE, Ty.getContext()); + return TyDIE; +} + +/// addType - Add a new type attribute to the specified entity. +void CompileUnit::addType(DIE *Entity, DIType Ty) { + if (!Ty.Verify()) + return; + + // Check for pre-existence. + DIEEntry *Entry = getDIEEntry(Ty); + // If it exists then use the existing value. + if (Entry) { + Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); + return; + } + + // Construct type. + DIE *Buffer = getOrCreateTypeDIE(Ty); + + // Set up proxy. + Entry = createDIEEntry(Buffer); + insertDIEEntry(Ty, Entry); + + Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); +} + +/// constructTypeDIE - Construct basic type die from DIBasicType. +void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { + // Get core information. + StringRef Name = BTy.getName(); + Buffer.setTag(dwarf::DW_TAG_base_type); + addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + BTy.getEncoding()); + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + uint64_t Size = BTy.getSizeInBits() >> 3; + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); +} + +/// constructTypeDIE - Construct derived type die from DIDerivedType. +void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { + // Get core information. + StringRef Name = DTy.getName(); + uint64_t Size = DTy.getSizeInBits() >> 3; + unsigned Tag = DTy.getTag(); + + // FIXME - Workaround for templates. + if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type; + + Buffer.setTag(Tag); + + // Map to main type, void will not have a type. + DIType FromTy = DTy.getTypeDerivedFrom(); + addType(&Buffer, FromTy); + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + // Add size if non-zero (derived types might be zero-sized.) + if (Size) + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + + // Add source line info if available and TyDesc is not a forward declaration. + if (!DTy.isForwardDecl()) + addSourceLine(&Buffer, DTy); +} + +/// constructTypeDIE - Construct type DIE from DICompositeType. +void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { + // Get core information. + StringRef Name = CTy.getName(); + + uint64_t Size = CTy.getSizeInBits() >> 3; + unsigned Tag = CTy.getTag(); + Buffer.setTag(Tag); + + switch (Tag) { + case dwarf::DW_TAG_vector_type: + case dwarf::DW_TAG_array_type: + constructArrayTypeDIE(Buffer, &CTy); + break; + case dwarf::DW_TAG_enumeration_type: { + DIArray Elements = CTy.getTypeArray(); + + // Add enumerators to enumeration type. + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIE *ElemDie = NULL; + DIDescriptor Enum(Elements.getElement(i)); + if (Enum.isEnumerator()) { + ElemDie = constructEnumTypeDIE(DIEnumerator(Enum)); + Buffer.addChild(ElemDie); + } + } + } + break; + case dwarf::DW_TAG_subroutine_type: { + // Add return type. + DIArray Elements = CTy.getTypeArray(); + DIDescriptor RTy = Elements.getElement(0); + addType(&Buffer, DIType(RTy)); + + bool isPrototyped = true; + // Add arguments. + for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Ty = Elements.getElement(i); + if (Ty.isUnspecifiedParameter()) { + DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters); + Buffer.addChild(Arg); + isPrototyped = false; + } else { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + addType(Arg, DIType(Ty)); + Buffer.addChild(Arg); + } + } + // Add prototype flag. + if (isPrototyped) + addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + } + break; + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_class_type: { + // Add elements to structure type. + DIArray Elements = CTy.getTypeArray(); + + // A forward struct declared type may not have elements available. + unsigned N = Elements.getNumElements(); + if (N == 0) + break; + + // Add elements to structure type. + for (unsigned i = 0; i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + DIE *ElemDie = NULL; + if (Element.isSubprogram()) { + DISubprogram SP(Element); + ElemDie = DD->createSubprogramDIE(DISubprogram(Element)); + if (SP.isProtected()) + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_protected); + else if (SP.isPrivate()) + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_private); + else + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_public); + if (SP.isExplicit()) + addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1); + } + else if (Element.isVariable()) { + DIVariable DV(Element); + ElemDie = new DIE(dwarf::DW_TAG_variable); + addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, + DV.getName()); + addType(ElemDie, DV.getType()); + addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + addSourceLine(ElemDie, DV); + } else if (Element.isDerivedType()) + ElemDie = createMemberDIE(DIDerivedType(Element)); + else + continue; + Buffer.addChild(ElemDie); + } + + if (CTy.isAppleBlockExtension()) + addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1); + + unsigned RLang = CTy.getRunTimeLang(); + if (RLang) + addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, + dwarf::DW_FORM_data1, RLang); + + DICompositeType ContainingType = CTy.getContainingType(); + if (DIDescriptor(ContainingType).isCompositeType()) + addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, + getOrCreateTypeDIE(DIType(ContainingType))); + else { + DIDescriptor Context = CTy.getContext(); + addToContextOwner(&Buffer, Context); + } + + if (Tag == dwarf::DW_TAG_class_type) + addTemplateParams(Buffer, CTy.getTemplateParams()); + + break; + } + default: + break; + } + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type + || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) + { + // Add size if non-zero (derived types might be zero-sized.) + if (Size) + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + else { + // Add zero size if it is not a forward declaration. + if (CTy.isForwardDecl()) + addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + else + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0); + } + + // Add source line info if available. + if (!CTy.isForwardDecl()) + addSourceLine(&Buffer, CTy); + } +} + +/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE +/// for the given DITemplateTypeParameter. +DIE * +CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) { + DIE *ParamDIE = getDIE(TP); + if (ParamDIE) + return ParamDIE; + + ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter); + addType(ParamDIE, TP.getType()); + addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName()); + return ParamDIE; +} + +/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE +/// for the given DITemplateValueParameter. +DIE * +CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) { + DIE *ParamDIE = getDIE(TPV); + if (ParamDIE) + return ParamDIE; + + ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter); + addType(ParamDIE, TPV.getType()); + if (!TPV.getName().empty()) + addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName()); + addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, + TPV.getValue()); + return ParamDIE; +} + +/// constructSubrangeDIE - Construct subrange DIE from DISubrange. +void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ + DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); + addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); + int64_t L = SR.getLo(); + int64_t H = SR.getHi(); + + // The L value defines the lower bounds which is typically zero for C/C++. The + // H value is the upper bounds. Values are 64 bit. H - L + 1 is the size + // of the array. If L > H then do not emit DW_AT_lower_bound and + // DW_AT_upper_bound attributes. If L is zero and H is also zero then the + // array has one element and in such case do not emit lower bound. + + if (L > H) { + Buffer.addChild(DW_Subrange); + return; + } + if (L) + addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); + addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); + Buffer.addChild(DW_Subrange); +} + +/// constructArrayTypeDIE - Construct array type DIE from DICompositeType. +void CompileUnit::constructArrayTypeDIE(DIE &Buffer, + DICompositeType *CTy) { + Buffer.setTag(dwarf::DW_TAG_array_type); + if (CTy->getTag() == dwarf::DW_TAG_vector_type) + addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1); + + // Emit derived type. + addType(&Buffer, CTy->getTypeDerivedFrom()); + DIArray Elements = CTy->getTypeArray(); + + // Get an anonymous type for index type. + DIE *IdxTy = getIndexTyDie(); + if (!IdxTy) { + // Construct an anonymous type for index type. + IdxTy = new DIE(dwarf::DW_TAG_base_type); + addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t)); + addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + dwarf::DW_ATE_signed); + addDie(IdxTy); + setIndexTyDie(IdxTy); + } + + // Add subranges to array type. + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + if (Element.getTag() == dwarf::DW_TAG_subrange_type) + constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy); + } +} + +/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. +DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) { + DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); + StringRef Name = ETy.getName(); + addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + int64_t Value = ETy.getEnumValue(); + addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); + return Enumerator; +} + +/// createMemberDIE - Create new member DIE. +DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { + DIE *MemberDie = new DIE(DT.getTag()); + StringRef Name = DT.getName(); + if (!Name.empty()) + addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + addType(MemberDie, DT.getTypeDerivedFrom()); + + addSourceLine(MemberDie, DT); + + DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock(); + addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + + uint64_t Size = DT.getSizeInBits(); + uint64_t FieldSize = DT.getOriginalTypeSize(); + + if (Size != FieldSize) { + // Handle bitfield. + addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3); + addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits()); + + uint64_t Offset = DT.getOffsetInBits(); + uint64_t AlignMask = ~(DT.getAlignInBits() - 1); + uint64_t HiMark = (Offset + FieldSize) & AlignMask; + uint64_t FieldOffset = (HiMark - FieldSize); + Offset -= FieldOffset; + + // Maybe we need to work from the other end. + if (Asm->getTargetData().isLittleEndian()) + Offset = FieldSize - (Offset + Size); + addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset); + + // Here WD_AT_data_member_location points to the anonymous + // field that includes this bit field. + addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3); + + } else + // This is not a bitfield. + addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3); + + if (DT.getTag() == dwarf::DW_TAG_inheritance + && DT.isVirtual()) { + + // For C++, virtual base classes are not at fixed offset. Use following + // expression to extract appropriate offset from vtable. + // BaseAddr = ObAddr + *((*ObAddr) - Offset) + + DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock(); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits()); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + + addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, + VBaseLocationDie); + } else + addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); + + if (DT.isProtected()) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_protected); + else if (DT.isPrivate()) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_private); + // Otherwise C++ member and base classes are considered public. + else if (DT.getCompileUnit().getLanguage() == dwarf::DW_LANG_C_plus_plus) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_public); + if (DT.isVirtual()) + addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, + dwarf::DW_VIRTUALITY_virtual); + + // Objective-C properties. + StringRef PropertyName = DT.getObjCPropertyName(); + if (!PropertyName.empty()) { + addString(MemberDie, dwarf::DW_AT_APPLE_property_name, dwarf::DW_FORM_string, + PropertyName); + StringRef GetterName = DT.getObjCPropertyGetterName(); + if (!GetterName.empty()) + addString(MemberDie, dwarf::DW_AT_APPLE_property_getter, + dwarf::DW_FORM_string, GetterName); + StringRef SetterName = DT.getObjCPropertySetterName(); + if (!SetterName.empty()) + addString(MemberDie, dwarf::DW_AT_APPLE_property_setter, + dwarf::DW_FORM_string, SetterName); + unsigned PropertyAttributes = 0; + if (DT.isReadOnlyObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly; + if (DT.isReadWriteObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite; + if (DT.isAssignObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign; + if (DT.isRetainObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain; + if (DT.isCopyObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy; + if (DT.isNonAtomicObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic; + if (PropertyAttributes) + addUInt(MemberDie, dwarf::DW_AT_APPLE_property_attribute, 0, + PropertyAttributes); + } + return MemberDie; +} diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h new file mode 100644 index 0000000..f4f6fb8 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -0,0 +1,282 @@ +//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H +#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H + +#include "DIE.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/OwningPtr.h" + +namespace llvm { + +class DwarfDebug; +class MachineLocation; +class MachineOperand; +class ConstantInt; +class DbgVariable; + +//===----------------------------------------------------------------------===// +/// CompileUnit - This dwarf writer support class manages information associate +/// with a source file. +class CompileUnit { + /// ID - File identifier for source. + /// + unsigned ID; + + /// Die - Compile unit debug information entry. + /// + const OwningPtr<DIE> CUDie; + + /// Asm - Target of Dwarf emission. + AsmPrinter *Asm; + + DwarfDebug *DD; + + /// IndexTyDie - An anonymous type for index type. Owned by CUDie. + DIE *IndexTyDie; + + /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton + /// variables to debug information entries. + DenseMap<const MDNode *, DIE *> MDNodeToDieMap; + + /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton + /// descriptors to debug information entries using a DIEEntry proxy. + DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap; + + /// Globals - A map of globally visible named entities for this unit. + /// + StringMap<DIE*> Globals; + + /// GlobalTypes - A map of globally visible types for this unit. + /// + StringMap<DIE*> GlobalTypes; + + /// DIEBlocks - A list of all the DIEBlocks in use. + std::vector<DIEBlock *> DIEBlocks; + +public: + CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW); + ~CompileUnit(); + + // Accessors. + unsigned getID() const { return ID; } + DIE* getCUDie() const { return CUDie.get(); } + const StringMap<DIE*> &getGlobals() const { return Globals; } + const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; } + + /// hasContent - Return true if this compile unit has something to write out. + /// + bool hasContent() const { return !CUDie->getChildren().empty(); } + + /// addGlobal - Add a new global entity to the compile unit. + /// + void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; } + + /// addGlobalType - Add a new global type to the compile unit. + /// + void addGlobalType(StringRef Name, DIE *Die) { + GlobalTypes[Name] = Die; + } + + /// getDIE - Returns the debug information entry map slot for the + /// specified debug variable. + DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); } + + DIEBlock *getDIEBlock() { + return new (DIEValueAllocator) DIEBlock(); + } + + /// insertDIE - Insert DIE into the map. + void insertDIE(const MDNode *N, DIE *D) { + MDNodeToDieMap.insert(std::make_pair(N, D)); + } + + /// getDIEEntry - Returns the debug information entry for the speciefied + /// debug variable. + DIEEntry *getDIEEntry(const MDNode *N) { + DenseMap<const MDNode *, DIEEntry *>::iterator I = + MDNodeToDIEEntryMap.find(N); + if (I == MDNodeToDIEEntryMap.end()) + return NULL; + return I->second; + } + + /// insertDIEEntry - Insert debug information entry into the map. + void insertDIEEntry(const MDNode *N, DIEEntry *E) { + MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); + } + + /// addDie - Adds or interns the DIE to the compile unit. + /// + void addDie(DIE *Buffer) { + this->CUDie->addChild(Buffer); + } + + // getIndexTyDie - Get an anonymous type for index type. + DIE *getIndexTyDie() { + return IndexTyDie; + } + + // setIndexTyDie - Set D as anonymous type for index which can be reused + // later. + void setIndexTyDie(DIE *D) { + IndexTyDie = D; + } +public: + + /// addUInt - Add an unsigned integer attribute data and value. + /// + void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer); + + /// addSInt - Add an signed integer attribute data and value. + /// + void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer); + + /// addString - Add a string attribute data and value. + /// + void addString(DIE *Die, unsigned Attribute, unsigned Form, + const StringRef Str); + + /// addLabel - Add a Dwarf label attribute data and value. + /// + void addLabel(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Label); + + /// addDelta - Add a label delta attribute data and value. + /// + void addDelta(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Hi, const MCSymbol *Lo); + + /// addDIEEntry - Add a DIE attribute data and value. + /// + void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry); + + /// addBlock - Add block data. + /// + void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block); + + /// addSourceLine - Add location information to specified debug information + /// entry. + void addSourceLine(DIE *Die, DIVariable V); + void addSourceLine(DIE *Die, DIGlobalVariable G); + void addSourceLine(DIE *Die, DISubprogram SP); + void addSourceLine(DIE *Die, DIType Ty); + void addSourceLine(DIE *Die, DINameSpace NS); + + /// addAddress - Add an address attribute to a die based on the location + /// provided. + void addAddress(DIE *Die, unsigned Attribute, + const MachineLocation &Location); + + /// addConstantValue - Add constant value entry in variable DIE. + bool addConstantValue(DIE *Die, const MachineOperand &MO); + bool addConstantValue(DIE *Die, ConstantInt *CI, bool Unsigned); + + /// addConstantFPValue - Add constant value entry in variable DIE. + bool addConstantFPValue(DIE *Die, const MachineOperand &MO); + + /// addTemplateParams - Add template parameters in buffer. + void addTemplateParams(DIE &Buffer, DIArray TParams); + + /// addRegisterOp - Add register operand. + void addRegisterOp(DIE *TheDie, unsigned Reg); + + /// addRegisterOffset - Add register offset. + void addRegisterOffset(DIE *TheDie, unsigned Reg, int64_t Offset); + + /// addComplexAddress - Start with the address based on the location provided, + /// and generate the DWARF information necessary to find the actual variable + /// (navigating the extra location information encoded in the type) based on + /// the starting location. Add the DWARF information to the die. + /// + void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + const MachineLocation &Location); + + // FIXME: Should be reformulated in terms of addComplexAddress. + /// addBlockByrefAddress - Start with the address based on the location + /// provided, and generate the DWARF information necessary to find the + /// actual Block variable (navigating the Block struct) based on the + /// starting location. Add the DWARF information to the die. Obsolete, + /// please use addComplexAddress instead. + /// + void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + const MachineLocation &Location); + + /// addVariableAddress - Add DW_AT_location attribute for a + /// DbgVariable based on provided MachineLocation. + void addVariableAddress(DbgVariable *&DV, DIE *Die, MachineLocation Location); + + /// addToContextOwner - Add Die into the list of its context owner's children. + void addToContextOwner(DIE *Die, DIDescriptor Context); + + /// addType - Add a new type attribute to the specified entity. + void addType(DIE *Entity, DIType Ty); + + /// getOrCreateNameSpace - Create a DIE for DINameSpace. + DIE *getOrCreateNameSpace(DINameSpace NS); + + /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the + /// given DIType. + DIE *getOrCreateTypeDIE(DIType Ty); + + /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE + /// for the given DITemplateTypeParameter. + DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP); + + /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE + /// for the given DITemplateValueParameter. + DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP); + + /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug + /// information entry. + DIEEntry *createDIEEntry(DIE *Entry); + + void addPubTypes(DISubprogram SP); + + /// constructTypeDIE - Construct basic type die from DIBasicType. + void constructTypeDIE(DIE &Buffer, + DIBasicType BTy); + + /// constructTypeDIE - Construct derived type die from DIDerivedType. + void constructTypeDIE(DIE &Buffer, + DIDerivedType DTy); + + /// constructTypeDIE - Construct type DIE from DICompositeType. + void constructTypeDIE(DIE &Buffer, + DICompositeType CTy); + + /// constructSubrangeDIE - Construct subrange DIE from DISubrange. + void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); + + /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. + void constructArrayTypeDIE(DIE &Buffer, + DICompositeType *CTy); + + /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. + DIE *constructEnumTypeDIE(DIEnumerator ETy); + + /// createMemberDIE - Create new member DIE. + DIE *createMemberDIE(DIDerivedType DT); + +private: + + // DIEValueAllocator - All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; + DIEInteger *DIEIntegerOne; +}; + +} // end llvm namespace +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 780fa40..26da800 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "dwarfdebug" #include "DwarfDebug.h" #include "DIE.h" +#include "DwarfCompileUnit.h" #include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/Instructions.h" @@ -52,7 +53,7 @@ static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", cl::desc("Disable debug info printing")); static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden, - cl::desc("Make an absense of debug location information explicit."), + cl::desc("Make an absence of debug location information explicit."), cl::init(false)); #ifndef NDEBUG @@ -72,189 +73,56 @@ static const unsigned InitAbbreviationsSetSize = 9; // log2(512) namespace llvm { -//===----------------------------------------------------------------------===// -/// CompileUnit - This dwarf writer support class manages information associate -/// with a source file. -class CompileUnit { - /// ID - File identifier for source. - /// - unsigned ID; - - /// Die - Compile unit debug information entry. - /// - const OwningPtr<DIE> CUDie; - - /// IndexTyDie - An anonymous type for index type. Owned by CUDie. - DIE *IndexTyDie; - - /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton - /// variables to debug information entries. - DenseMap<const MDNode *, DIE *> MDNodeToDieMap; - - /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton - /// descriptors to debug information entries using a DIEEntry proxy. - DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap; - - /// Globals - A map of globally visible named entities for this unit. - /// - StringMap<DIE*> Globals; - - /// GlobalTypes - A map of globally visible types for this unit. - /// - StringMap<DIE*> GlobalTypes; - -public: - CompileUnit(unsigned I, DIE *D) - : ID(I), CUDie(D), IndexTyDie(0) {} - - // Accessors. - unsigned getID() const { return ID; } - DIE* getCUDie() const { return CUDie.get(); } - const StringMap<DIE*> &getGlobals() const { return Globals; } - const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; } - - /// hasContent - Return true if this compile unit has something to write out. - /// - bool hasContent() const { return !CUDie->getChildren().empty(); } - - /// addGlobal - Add a new global entity to the compile unit. - /// - void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; } - - /// addGlobalType - Add a new global type to the compile unit. - /// - void addGlobalType(StringRef Name, DIE *Die) { - GlobalTypes[Name] = Die; - } - - /// getDIE - Returns the debug information entry map slot for the - /// specified debug variable. - DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); } - - /// insertDIE - Insert DIE into the map. - void insertDIE(const MDNode *N, DIE *D) { - MDNodeToDieMap.insert(std::make_pair(N, D)); - } - - /// getDIEEntry - Returns the debug information entry for the speciefied - /// debug variable. - DIEEntry *getDIEEntry(const MDNode *N) { - DenseMap<const MDNode *, DIEEntry *>::iterator I = - MDNodeToDIEEntryMap.find(N); - if (I == MDNodeToDIEEntryMap.end()) - return NULL; - return I->second; - } - - /// insertDIEEntry - Insert debug information entry into the map. - void insertDIEEntry(const MDNode *N, DIEEntry *E) { - MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); - } - - /// addDie - Adds or interns the DIE to the compile unit. - /// - void addDie(DIE *Buffer) { - this->CUDie->addChild(Buffer); - } - - // getIndexTyDie - Get an anonymous type for index type. - DIE *getIndexTyDie() { - return IndexTyDie; - } - - // setIndexTyDie - Set D as anonymous type for index which can be reused - // later. - void setIndexTyDie(DIE *D) { - IndexTyDie = D; - } - -}; - -//===----------------------------------------------------------------------===// -/// DbgVariable - This class is used to track local variable information. -/// -class DbgVariable { - DIVariable Var; // Variable Descriptor. - DIE *TheDIE; // Variable DIE. - unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. -public: - // AbsVar may be NULL. - DbgVariable(DIVariable V) : Var(V), TheDIE(0), DotDebugLocOffset(~0U) {} - - // Accessors. - DIVariable getVariable() const { return Var; } - void setDIE(DIE *D) { TheDIE = D; } - DIE *getDIE() const { return TheDIE; } - void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } - unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } - StringRef getName() const { return Var.getName(); } - unsigned getTag() const { return Var.getTag(); } - bool variableHasComplexAddress() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); - return Var.hasComplexAddress(); - } - bool isBlockByrefVariable() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); - return Var.isBlockByrefVariable(); - } - unsigned getNumAddrElements() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); - return Var.getNumAddrElements(); - } - uint64_t getAddrElement(unsigned i) const { - return Var.getAddrElement(i); - } - DIType getType() const { - DIType Ty = Var.getType(); - // FIXME: isBlockByrefVariable should be reformulated in terms of complex - // addresses instead. - if (Var.isBlockByrefVariable()) { - /* Byref variables, in Blocks, are declared by the programmer as - "SomeType VarName;", but the compiler creates a - __Block_byref_x_VarName struct, and gives the variable VarName - either the struct, or a pointer to the struct, as its type. This - is necessary for various behind-the-scenes things the compiler - needs to do with by-reference variables in blocks. - - However, as far as the original *programmer* is concerned, the - variable should still have type 'SomeType', as originally declared. - - The following function dives into the __Block_byref_x_VarName - struct to find the original type of the variable. This will be - passed back to the code generating the type for the Debug - Information Entry for the variable 'VarName'. 'VarName' will then - have the original type 'SomeType' in its debug information. - - The original type 'SomeType' will be the type of the field named - 'VarName' inside the __Block_byref_x_VarName struct. - - NOTE: In order for this to not completely fail on the debugger - side, the Debug Information Entry for the variable VarName needs to - have a DW_AT_location that tells the debugger how to unwind through - the pointers and __Block_byref_x_VarName struct to find the actual - value of the variable. The function addBlockByrefType does this. */ - DIType subType = Ty; - unsigned tag = Ty.getTag(); - - if (tag == dwarf::DW_TAG_pointer_type) { - DIDerivedType DTy = DIDerivedType(Ty); - subType = DTy.getTypeDerivedFrom(); - } - - DICompositeType blockStruct = DICompositeType(subType); - DIArray Elements = blockStruct.getTypeArray(); - - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - DIDerivedType DT = DIDerivedType(Element); - if (getName() == DT.getName()) - return (DT.getTypeDerivedFrom()); - } - return Ty; +DIType DbgVariable::getType() const { + DIType Ty = Var.getType(); + // FIXME: isBlockByrefVariable should be reformulated in terms of complex + // addresses instead. + if (Var.isBlockByrefVariable()) { + /* Byref variables, in Blocks, are declared by the programmer as + "SomeType VarName;", but the compiler creates a + __Block_byref_x_VarName struct, and gives the variable VarName + either the struct, or a pointer to the struct, as its type. This + is necessary for various behind-the-scenes things the compiler + needs to do with by-reference variables in blocks. + + However, as far as the original *programmer* is concerned, the + variable should still have type 'SomeType', as originally declared. + + The following function dives into the __Block_byref_x_VarName + struct to find the original type of the variable. This will be + passed back to the code generating the type for the Debug + Information Entry for the variable 'VarName'. 'VarName' will then + have the original type 'SomeType' in its debug information. + + The original type 'SomeType' will be the type of the field named + 'VarName' inside the __Block_byref_x_VarName struct. + + NOTE: In order for this to not completely fail on the debugger + side, the Debug Information Entry for the variable VarName needs to + have a DW_AT_location that tells the debugger how to unwind through + the pointers and __Block_byref_x_VarName struct to find the actual + value of the variable. The function addBlockByrefType does this. */ + DIType subType = Ty; + unsigned tag = Ty.getTag(); + + if (tag == dwarf::DW_TAG_pointer_type) { + DIDerivedType DTy = DIDerivedType(Ty); + subType = DTy.getTypeDerivedFrom(); + } + + DICompositeType blockStruct = DICompositeType(subType); + DIArray Elements = blockStruct.getTypeArray(); + + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + DIDerivedType DT = DIDerivedType(Element); + if (getName() == DT.getName()) + return (DT.getTypeDerivedFrom()); } return Ty; } -}; + return Ty; +} //===----------------------------------------------------------------------===// /// DbgRange - This is used to track range of instructions with identical @@ -396,15 +264,12 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfStrSectionSym = TextSectionSym = 0; DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; - DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); beginModule(M); } } DwarfDebug::~DwarfDebug() { - for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j) - DIEBlocks[j]->~DIEBlock(); } MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) { @@ -439,852 +304,6 @@ void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) { } } -/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug -/// information entry. -DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) { - DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry); - return Value; -} - -/// addUInt - Add an unsigned integer attribute data and value. -/// -void DwarfDebug::addUInt(DIE *Die, unsigned Attribute, - unsigned Form, uint64_t Integer) { - if (!Form) Form = DIEInteger::BestForm(false, Integer); - DIEValue *Value = Integer == 1 ? - DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer); - Die->addValue(Attribute, Form, Value); -} - -/// addSInt - Add an signed integer attribute data and value. -/// -void DwarfDebug::addSInt(DIE *Die, unsigned Attribute, - unsigned Form, int64_t Integer) { - if (!Form) Form = DIEInteger::BestForm(true, Integer); - DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); - Die->addValue(Attribute, Form, Value); -} - -/// addString - Add a string attribute data and value. DIEString only -/// keeps string reference. -void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form, - StringRef String) { - DIEValue *Value = new (DIEValueAllocator) DIEString(String); - Die->addValue(Attribute, Form, Value); -} - -/// addLabel - Add a Dwarf label attribute data and value. -/// -void DwarfDebug::addLabel(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Label) { - DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); - Die->addValue(Attribute, Form, Value); -} - -/// addDelta - Add a label delta attribute data and value. -/// -void DwarfDebug::addDelta(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Hi, const MCSymbol *Lo) { - DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); - Die->addValue(Attribute, Form, Value); -} - -/// addDIEEntry - Add a DIE attribute data and value. -/// -void DwarfDebug::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, - DIE *Entry) { - Die->addValue(Attribute, Form, createDIEEntry(Entry)); -} - - -/// addBlock - Add block data. -/// -void DwarfDebug::addBlock(DIE *Die, unsigned Attribute, unsigned Form, - DIEBlock *Block) { - Block->ComputeSize(Asm); - DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on. - Die->addValue(Attribute, Block->BestForm(), Block); -} - -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfDebug::addSourceLine(DIE *Die, DIVariable V) { - // Verify variable. - if (!V.Verify()) - return; - - unsigned Line = V.getLineNumber(); - if (Line == 0) - return; - unsigned FileID = GetOrCreateSourceID(V.getContext().getFilename()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); -} - -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfDebug::addSourceLine(DIE *Die, DIGlobalVariable G) { - // Verify global variable. - if (!G.Verify()) - return; - - unsigned Line = G.getLineNumber(); - if (Line == 0) - return; - unsigned FileID = GetOrCreateSourceID(G.getContext().getFilename()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); -} - -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfDebug::addSourceLine(DIE *Die, DISubprogram SP) { - // Verify subprogram. - if (!SP.Verify()) - return; - // If the line number is 0, don't add it. - if (SP.getLineNumber() == 0) - return; - - unsigned Line = SP.getLineNumber(); - if (!SP.getContext().Verify()) - return; - unsigned FileID = GetOrCreateSourceID(SP.getFilename()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); -} - -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfDebug::addSourceLine(DIE *Die, DIType Ty) { - // Verify type. - if (!Ty.Verify()) - return; - - unsigned Line = Ty.getLineNumber(); - if (Line == 0 || !Ty.getContext().Verify()) - return; - unsigned FileID = GetOrCreateSourceID(Ty.getFilename()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); -} - -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfDebug::addSourceLine(DIE *Die, DINameSpace NS) { - // Verify namespace. - if (!NS.Verify()) - return; - - unsigned Line = NS.getLineNumber(); - if (Line == 0) - return; - StringRef FN = NS.getFilename(); - - unsigned FileID = GetOrCreateSourceID(FN); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); -} - -/// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based -/// on provided frame index. -void DwarfDebug::addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI) { - MachineLocation Location; - unsigned FrameReg; - const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); - int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); - Location.set(FrameReg, Offset); - - if (DV->variableHasComplexAddress()) - addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); - else if (DV->isBlockByrefVariable()) - addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); - else - addAddress(Die, dwarf::DW_AT_location, Location); -} - -/// addComplexAddress - Start with the address based on the location provided, -/// and generate the DWARF information necessary to find the actual variable -/// given the extra address information encoded in the DIVariable, starting from -/// the starting location. Add the DWARF information to the die. -/// -void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die, - unsigned Attribute, - const MachineLocation &Location) { - DIType Ty = DV->getType(); - - // Decode the original location, and use that as the start of the byref - // variable's location. - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - - if (Location.isReg()) { - if (Reg < 32) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); - } else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - } else { - if (Reg < 32) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); - else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - - addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); - } - - for (unsigned i = 0, N = DV->getNumAddrElements(); i < N; ++i) { - uint64_t Element = DV->getAddrElement(i); - - if (Element == DIBuilder::OpPlus) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i)); - } else if (Element == DIBuilder::OpDeref) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - } else llvm_unreachable("unknown DIBuilder Opcode"); - } - - // Now attach the location information to the DIE. - addBlock(Die, Attribute, 0, Block); -} - -/* Byref variables, in Blocks, are declared by the programmer as "SomeType - VarName;", but the compiler creates a __Block_byref_x_VarName struct, and - gives the variable VarName either the struct, or a pointer to the struct, as - its type. This is necessary for various behind-the-scenes things the - compiler needs to do with by-reference variables in Blocks. - - However, as far as the original *programmer* is concerned, the variable - should still have type 'SomeType', as originally declared. - - The function getBlockByrefType dives into the __Block_byref_x_VarName - struct to find the original type of the variable, which is then assigned to - the variable's Debug Information Entry as its real type. So far, so good. - However now the debugger will expect the variable VarName to have the type - SomeType. So we need the location attribute for the variable to be an - expression that explains to the debugger how to navigate through the - pointers and struct to find the actual variable of type SomeType. - - The following function does just that. We start by getting - the "normal" location for the variable. This will be the location - of either the struct __Block_byref_x_VarName or the pointer to the - struct __Block_byref_x_VarName. - - The struct will look something like: - - struct __Block_byref_x_VarName { - ... <various fields> - struct __Block_byref_x_VarName *forwarding; - ... <various other fields> - SomeType VarName; - ... <maybe more fields> - }; - - If we are given the struct directly (as our starting point) we - need to tell the debugger to: - - 1). Add the offset of the forwarding field. - - 2). Follow that pointer to get the real __Block_byref_x_VarName - struct to use (the real one may have been copied onto the heap). - - 3). Add the offset for the field VarName, to find the actual variable. - - If we started with a pointer to the struct, then we need to - dereference that pointer first, before the other steps. - Translating this into DWARF ops, we will need to append the following - to the current location description for the variable: - - DW_OP_deref -- optional, if we start with a pointer - DW_OP_plus_uconst <forward_fld_offset> - DW_OP_deref - DW_OP_plus_uconst <varName_fld_offset> - - That is what this function does. */ - -/// addBlockByrefAddress - Start with the address based on the location -/// provided, and generate the DWARF information necessary to find the -/// actual Block variable (navigating the Block struct) based on the -/// starting location. Add the DWARF information to the die. For -/// more information, read large comment just above here. -/// -void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, - unsigned Attribute, - const MachineLocation &Location) { - DIType Ty = DV->getType(); - DIType TmpTy = Ty; - unsigned Tag = Ty.getTag(); - bool isPointer = false; - - StringRef varName = DV->getName(); - - if (Tag == dwarf::DW_TAG_pointer_type) { - DIDerivedType DTy = DIDerivedType(Ty); - TmpTy = DTy.getTypeDerivedFrom(); - isPointer = true; - } - - DICompositeType blockStruct = DICompositeType(TmpTy); - - // Find the __forwarding field and the variable field in the __Block_byref - // struct. - DIArray Fields = blockStruct.getTypeArray(); - DIDescriptor varField = DIDescriptor(); - DIDescriptor forwardingField = DIDescriptor(); - - for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) { - DIDescriptor Element = Fields.getElement(i); - DIDerivedType DT = DIDerivedType(Element); - StringRef fieldName = DT.getName(); - if (fieldName == "__forwarding") - forwardingField = Element; - else if (fieldName == varName) - varField = Element; - } - - // Get the offsets for the forwarding field and the variable field. - unsigned forwardingFieldOffset = - DIDerivedType(forwardingField).getOffsetInBits() >> 3; - unsigned varFieldOffset = - DIDerivedType(varField).getOffsetInBits() >> 3; - - // Decode the original location, and use that as the start of the byref - // variable's location. - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - - if (Location.isReg()) { - if (Reg < 32) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); - else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - } else { - if (Reg < 32) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); - else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - - addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); - } - - // If we started with a pointer to the __Block_byref... struct, then - // the first thing we need to do is dereference the pointer (DW_OP_deref). - if (isPointer) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - - // Next add the offset for the '__forwarding' field: - // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in - // adding the offset if it's 0. - if (forwardingFieldOffset > 0) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset); - } - - // Now dereference the __forwarding field to get to the real __Block_byref - // struct: DW_OP_deref. - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - - // Now that we've got the real __Block_byref... struct, add the offset - // for the variable's field to get to the location of the actual variable: - // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. - if (varFieldOffset > 0) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset); - } - - // Now attach the location information to the DIE. - addBlock(Die, Attribute, 0, Block); -} - -/// addAddress - Add an address attribute to a die based on the location -/// provided. -void DwarfDebug::addAddress(DIE *Die, unsigned Attribute, - const MachineLocation &Location) { - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - - if (RI->getFrameRegister(*Asm->MF) == Location.getReg() - && Location.getOffset()) { - // If variable offset is based in frame register then use fbreg. - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg); - addSInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); - addBlock(Die, Attribute, 0, Block); - return; - } - - if (Location.isReg()) { - if (Reg < 32) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); - } else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - } else { - if (Reg < 32) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); - } else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - - addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); - } - - addBlock(Die, Attribute, 0, Block); -} - -/// addRegisterAddress - Add register location entry in variable DIE. -bool DwarfDebug::addRegisterAddress(DIE *Die, const MachineOperand &MO) { - assert (MO.isReg() && "Invalid machine operand!"); - if (!MO.getReg()) - return false; - MachineLocation Location; - Location.set(MO.getReg()); - addAddress(Die, dwarf::DW_AT_location, Location); - return true; -} - -/// addConstantValue - Add constant value entry in variable DIE. -bool DwarfDebug::addConstantValue(DIE *Die, const MachineOperand &MO) { - assert (MO.isImm() && "Invalid machine operand!"); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - unsigned Imm = MO.getImm(); - addUInt(Block, 0, dwarf::DW_FORM_udata, Imm); - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; -} - -/// addConstantFPValue - Add constant value entry in variable DIE. -bool DwarfDebug::addConstantFPValue(DIE *Die, const MachineOperand &MO) { - assert (MO.isFPImm() && "Invalid machine operand!"); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - APFloat FPImm = MO.getFPImm()->getValueAPF(); - - // Get the raw data form of the floating point. - const APInt FltVal = FPImm.bitcastToAPInt(); - const char *FltPtr = (const char*)FltVal.getRawData(); - - int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. - bool LittleEndian = Asm->getTargetData().isLittleEndian(); - int Incr = (LittleEndian ? 1 : -1); - int Start = (LittleEndian ? 0 : NumBytes - 1); - int Stop = (LittleEndian ? NumBytes : -1); - - // Output the constant to DWARF one byte at a time. - for (; Start != Stop; Start += Incr) - addUInt(Block, 0, dwarf::DW_FORM_data1, - (unsigned char)0xFF & FltPtr[Start]); - - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; -} - -/// addConstantValue - Add constant value entry in variable DIE. -bool DwarfDebug::addConstantValue(DIE *Die, ConstantInt *CI, - bool Unsigned) { - if (CI->getBitWidth() <= 64) { - if (Unsigned) - addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, - CI->getZExtValue()); - else - addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, - CI->getSExtValue()); - return true; - } - - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - - // Get the raw data form of the large APInt. - const APInt Val = CI->getValue(); - const char *Ptr = (const char*)Val.getRawData(); - - int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte. - bool LittleEndian = Asm->getTargetData().isLittleEndian(); - int Incr = (LittleEndian ? 1 : -1); - int Start = (LittleEndian ? 0 : NumBytes - 1); - int Stop = (LittleEndian ? NumBytes : -1); - - // Output the constant to DWARF one byte at a time. - for (; Start != Stop; Start += Incr) - addUInt(Block, 0, dwarf::DW_FORM_data1, - (unsigned char)0xFF & Ptr[Start]); - - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; -} - -/// addToContextOwner - Add Die into the list of its context owner's children. -void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) { - if (Context.isType()) { - DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context)); - ContextDIE->addChild(Die); - } else if (Context.isNameSpace()) { - DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context)); - ContextDIE->addChild(Die); - } else if (Context.isSubprogram()) { - DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context)); - ContextDIE->addChild(Die); - } else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context)) - ContextDIE->addChild(Die); - else - getCompileUnit(Context)->addDie(Die); -} - -/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the -/// given DIType. -DIE *DwarfDebug::getOrCreateTypeDIE(DIType Ty) { - CompileUnit *TypeCU = getCompileUnit(Ty); - DIE *TyDIE = TypeCU->getDIE(Ty); - if (TyDIE) - return TyDIE; - - // Create new type. - TyDIE = new DIE(dwarf::DW_TAG_base_type); - TypeCU->insertDIE(Ty, TyDIE); - if (Ty.isBasicType()) - constructTypeDIE(*TyDIE, DIBasicType(Ty)); - else if (Ty.isCompositeType()) - constructTypeDIE(*TyDIE, DICompositeType(Ty)); - else { - assert(Ty.isDerivedType() && "Unknown kind of DIType"); - constructTypeDIE(*TyDIE, DIDerivedType(Ty)); - } - - addToContextOwner(TyDIE, Ty.getContext()); - return TyDIE; -} - -/// addType - Add a new type attribute to the specified entity. -void DwarfDebug::addType(DIE *Entity, DIType Ty) { - if (!Ty.Verify()) - return; - - // Check for pre-existence. - CompileUnit *TypeCU = getCompileUnit(Ty); - DIEEntry *Entry = TypeCU->getDIEEntry(Ty); - // If it exists then use the existing value. - if (Entry) { - Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); - return; - } - - // Construct type. - DIE *Buffer = getOrCreateTypeDIE(Ty); - - // Set up proxy. - Entry = createDIEEntry(Buffer); - TypeCU->insertDIEEntry(Ty, Entry); - - Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); -} - -/// constructTypeDIE - Construct basic type die from DIBasicType. -void DwarfDebug::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { - // Get core information. - StringRef Name = BTy.getName(); - Buffer.setTag(dwarf::DW_TAG_base_type); - addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, - BTy.getEncoding()); - - // Add name if not anonymous or intermediate type. - if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - uint64_t Size = BTy.getSizeInBits() >> 3; - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); -} - -/// constructTypeDIE - Construct derived type die from DIDerivedType. -void DwarfDebug::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { - // Get core information. - StringRef Name = DTy.getName(); - uint64_t Size = DTy.getSizeInBits() >> 3; - unsigned Tag = DTy.getTag(); - - // FIXME - Workaround for templates. - if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type; - - Buffer.setTag(Tag); - - // Map to main type, void will not have a type. - DIType FromTy = DTy.getTypeDerivedFrom(); - addType(&Buffer, FromTy); - - // Add name if not anonymous or intermediate type. - if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - - // Add size if non-zero (derived types might be zero-sized.) - if (Size) - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); - - // Add source line info if available and TyDesc is not a forward declaration. - if (!DTy.isForwardDecl()) - addSourceLine(&Buffer, DTy); -} - -/// constructTypeDIE - Construct type DIE from DICompositeType. -void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { - // Get core information. - StringRef Name = CTy.getName(); - - uint64_t Size = CTy.getSizeInBits() >> 3; - unsigned Tag = CTy.getTag(); - Buffer.setTag(Tag); - - switch (Tag) { - case dwarf::DW_TAG_vector_type: - case dwarf::DW_TAG_array_type: - constructArrayTypeDIE(Buffer, &CTy); - break; - case dwarf::DW_TAG_enumeration_type: { - DIArray Elements = CTy.getTypeArray(); - - // Add enumerators to enumeration type. - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIE *ElemDie = NULL; - DIDescriptor Enum(Elements.getElement(i)); - if (Enum.isEnumerator()) { - ElemDie = constructEnumTypeDIE(DIEnumerator(Enum)); - Buffer.addChild(ElemDie); - } - } - } - break; - case dwarf::DW_TAG_subroutine_type: { - // Add return type. - DIArray Elements = CTy.getTypeArray(); - DIDescriptor RTy = Elements.getElement(0); - addType(&Buffer, DIType(RTy)); - - bool isPrototyped = true; - // Add arguments. - for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Ty = Elements.getElement(i); - if (Ty.isUnspecifiedParameter()) { - DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters); - Buffer.addChild(Arg); - isPrototyped = false; - } else { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - addType(Arg, DIType(Ty)); - Buffer.addChild(Arg); - } - } - // Add prototype flag. - if (isPrototyped) - addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); - } - break; - case dwarf::DW_TAG_structure_type: - case dwarf::DW_TAG_union_type: - case dwarf::DW_TAG_class_type: { - // Add elements to structure type. - DIArray Elements = CTy.getTypeArray(); - - // A forward struct declared type may not have elements available. - unsigned N = Elements.getNumElements(); - if (N == 0) - break; - - // Add elements to structure type. - for (unsigned i = 0; i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - DIE *ElemDie = NULL; - if (Element.isSubprogram()) { - DISubprogram SP(Element); - ElemDie = createSubprogramDIE(DISubprogram(Element)); - if (SP.isProtected()) - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_protected); - else if (SP.isPrivate()) - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_private); - else - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_public); - if (SP.isExplicit()) - addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1); - } - else if (Element.isVariable()) { - DIVariable DV(Element); - ElemDie = new DIE(dwarf::DW_TAG_variable); - addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, - DV.getName()); - addType(ElemDie, DV.getType()); - addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); - addSourceLine(ElemDie, DV); - } else if (Element.isDerivedType()) - ElemDie = createMemberDIE(DIDerivedType(Element)); - else - continue; - Buffer.addChild(ElemDie); - } - - if (CTy.isAppleBlockExtension()) - addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1); - - unsigned RLang = CTy.getRunTimeLang(); - if (RLang) - addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, - dwarf::DW_FORM_data1, RLang); - - DICompositeType ContainingType = CTy.getContainingType(); - if (DIDescriptor(ContainingType).isCompositeType()) - addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, - getOrCreateTypeDIE(DIType(ContainingType))); - else { - DIDescriptor Context = CTy.getContext(); - addToContextOwner(&Buffer, Context); - } - - if (Tag == dwarf::DW_TAG_class_type) { - DIArray TParams = CTy.getTemplateParams(); - unsigned N = TParams.getNumElements(); - // Add template parameters. - for (unsigned i = 0; i < N; ++i) { - DIDescriptor Element = TParams.getElement(i); - if (Element.isTemplateTypeParameter()) - Buffer.addChild(getOrCreateTemplateTypeParameterDIE( - DITemplateTypeParameter(Element))); - else if (Element.isTemplateValueParameter()) - Buffer.addChild(getOrCreateTemplateValueParameterDIE( - DITemplateValueParameter(Element))); - } - } - break; - } - default: - break; - } - - // Add name if not anonymous or intermediate type. - if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - - if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type - || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) - { - // Add size if non-zero (derived types might be zero-sized.) - if (Size) - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); - else { - // Add zero size if it is not a forward declaration. - if (CTy.isForwardDecl()) - addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - else - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0); - } - - // Add source line info if available. - if (!CTy.isForwardDecl()) - addSourceLine(&Buffer, CTy); - } -} - -/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE -/// for the given DITemplateTypeParameter. -DIE * -DwarfDebug::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) { - CompileUnit *TypeCU = getCompileUnit(TP); - DIE *ParamDIE = TypeCU->getDIE(TP); - if (ParamDIE) - return ParamDIE; - - ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter); - addType(ParamDIE, TP.getType()); - addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName()); - return ParamDIE; -} - -/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE -/// for the given DITemplateValueParameter. -DIE * -DwarfDebug::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) { - CompileUnit *TVCU = getCompileUnit(TPV); - DIE *ParamDIE = TVCU->getDIE(TPV); - if (ParamDIE) - return ParamDIE; - - ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter); - addType(ParamDIE, TPV.getType()); - addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName()); - addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, - TPV.getValue()); - return ParamDIE; -} - -/// constructSubrangeDIE - Construct subrange DIE from DISubrange. -void DwarfDebug::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ - int64_t L = SR.getLo(); - int64_t H = SR.getHi(); - DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); - - addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); - if (L) - addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); - addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); - - Buffer.addChild(DW_Subrange); -} - -/// constructArrayTypeDIE - Construct array type DIE from DICompositeType. -void DwarfDebug::constructArrayTypeDIE(DIE &Buffer, - DICompositeType *CTy) { - Buffer.setTag(dwarf::DW_TAG_array_type); - if (CTy->getTag() == dwarf::DW_TAG_vector_type) - addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1); - - // Emit derived type. - addType(&Buffer, CTy->getTypeDerivedFrom()); - DIArray Elements = CTy->getTypeArray(); - - // Get an anonymous type for index type. - CompileUnit *TheCU = getCompileUnit(*CTy); - DIE *IdxTy = TheCU->getIndexTyDie(); - if (!IdxTy) { - // Construct an anonymous type for index type. - IdxTy = new DIE(dwarf::DW_TAG_base_type); - addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t)); - addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, - dwarf::DW_ATE_signed); - TheCU->addDie(IdxTy); - TheCU->setIndexTyDie(IdxTy); - } - - // Add subranges to array type. - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - if (Element.getTag() == dwarf::DW_TAG_subrange_type) - constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy); - } -} - -/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. -DIE *DwarfDebug::constructEnumTypeDIE(DIEnumerator ETy) { - DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); - StringRef Name = ETy.getName(); - addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - int64_t Value = ETy.getEnumValue(); - addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); - return Enumerator; -} - /// getRealLinkageName - If special LLVM prefix that is used to inform the asm /// printer to not emit usual symbol prefix before the symbol name is used then /// return linkage name after skipping this special LLVM prefix. @@ -1295,84 +314,6 @@ static StringRef getRealLinkageName(StringRef LinkageName) { return LinkageName; } -/// createMemberDIE - Create new member DIE. -DIE *DwarfDebug::createMemberDIE(DIDerivedType DT) { - DIE *MemberDie = new DIE(DT.getTag()); - StringRef Name = DT.getName(); - if (!Name.empty()) - addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - - addType(MemberDie, DT.getTypeDerivedFrom()); - - addSourceLine(MemberDie, DT); - - DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock(); - addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - - uint64_t Size = DT.getSizeInBits(); - uint64_t FieldSize = DT.getOriginalTypeSize(); - - if (Size != FieldSize) { - // Handle bitfield. - addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3); - addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits()); - - uint64_t Offset = DT.getOffsetInBits(); - uint64_t AlignMask = ~(DT.getAlignInBits() - 1); - uint64_t HiMark = (Offset + FieldSize) & AlignMask; - uint64_t FieldOffset = (HiMark - FieldSize); - Offset -= FieldOffset; - - // Maybe we need to work from the other end. - if (Asm->getTargetData().isLittleEndian()) - Offset = FieldSize - (Offset + Size); - addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset); - - // Here WD_AT_data_member_location points to the anonymous - // field that includes this bit field. - addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3); - - } else - // This is not a bitfield. - addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3); - - if (DT.getTag() == dwarf::DW_TAG_inheritance - && DT.isVirtual()) { - - // For C++, virtual base classes are not at fixed offset. Use following - // expression to extract appropriate offset from vtable. - // BaseAddr = ObAddr + *((*ObAddr) - Offset) - - DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock(); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits()); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - - addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, - VBaseLocationDie); - } else - addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); - - if (DT.isProtected()) - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_protected); - else if (DT.isPrivate()) - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_private); - // Otherwise C++ member and base classes are considered public. - else if (DT.getCompileUnit().getLanguage() == dwarf::DW_LANG_C_plus_plus) - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_public); - if (DT.isVirtual()) - addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, - dwarf::DW_VIRTUALITY_virtual); - return MemberDie; -} - /// createSubprogramDIE - Create new DIE using SP. DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) { CompileUnit *SPCU = getCompileUnit(SP); @@ -1381,19 +322,35 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) { return SPDie; SPDie = new DIE(dwarf::DW_TAG_subprogram); + + // DW_TAG_inlined_subroutine may refer to this DIE. + SPCU->insertDIE(SP, SPDie); + + // Add to context owner. + SPCU->addToContextOwner(SPDie, SP.getContext()); + + // Add function template parameters. + SPCU->addTemplateParams(*SPDie, SP.getTemplateParams()); + + // If this DIE is going to refer declaration info using AT_specification + // then there is no need to add other attributes. + if (SP.getFunctionDeclaration().isSubprogram()) + return SPDie; + // Constructors and operators for anonymous aggregates do not have names. if (!SP.getName().empty()) - addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName()); + SPCU->addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, + SP.getName()); StringRef LinkageName = SP.getLinkageName(); if (!LinkageName.empty()) - addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, - getRealLinkageName(LinkageName)); + SPCU->addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, + getRealLinkageName(LinkageName)); - addSourceLine(SPDie, SP); + SPCU->addSourceLine(SPDie, SP); if (SP.isPrototyped()) - addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + SPCU->addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); // Add Return Type. DICompositeType SPTy = SP.getType(); @@ -1401,24 +358,24 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) { unsigned SPTag = SPTy.getTag(); if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type) - addType(SPDie, SPTy); + SPCU->addType(SPDie, SPTy); else - addType(SPDie, DIType(Args.getElement(0))); + SPCU->addType(SPDie, DIType(Args.getElement(0))); unsigned VK = SP.getVirtuality(); if (VK) { - addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex()); - addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block); + SPCU->addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK); + DIEBlock *Block = SPCU->getDIEBlock(); + SPCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + SPCU->addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex()); + SPCU->addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block); ContainingTypeMap.insert(std::make_pair(SPDie, SP.getContainingType())); } if (!SP.isDefinition()) { - addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - + SPCU->addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + // Add arguments. Do not add arguments for subprogram definition. They will // be handled while processing variables. DICompositeType SPTy = SP.getType(); @@ -1429,32 +386,26 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) { for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); DIType ATy = DIType(DIType(Args.getElement(i))); - addType(Arg, ATy); + SPCU->addType(Arg, ATy); if (ATy.isArtificial()) - addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); SPDie->addChild(Arg); } } if (SP.isArtificial()) - addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + SPCU->addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); if (!SP.isLocalToUnit()) - addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + SPCU->addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); if (SP.isOptimized()) - addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + SPCU->addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); if (unsigned isa = Asm->getISAEncoding()) { - addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); + SPCU->addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); } - // DW_TAG_inlined_subroutine may refer to this DIE. - SPCU->insertDIE(SP, SPDie); - - // Add to context owner. - addToContextOwner(SPDie, SP.getContext()); - return SPDie; } @@ -1509,51 +460,57 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) { assert(SPDie && "Unable to find subprogram DIE!"); DISubprogram SP(SPNode); - // There is not any need to generate specification DIE for a function - // defined at compile unit level. If a function is defined inside another - // function then gdb prefers the definition at top level and but does not - // expect specification DIE in parent function. So avoid creating - // specification DIE for a function defined inside a function. - if (SP.isDefinition() && !SP.getContext().isCompileUnit() && - !SP.getContext().isFile() && - !isSubprogramContext(SP.getContext())) { - addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - - // Add arguments. - DICompositeType SPTy = SP.getType(); - DIArray Args = SPTy.getTypeArray(); - unsigned SPTag = SPTy.getTag(); - if (SPTag == dwarf::DW_TAG_subroutine_type) - for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - DIType ATy = DIType(DIType(Args.getElement(i))); - addType(Arg, ATy); - if (ATy.isArtificial()) - addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); - SPDie->addChild(Arg); - } - DIE *SPDeclDie = SPDie; - SPDie = new DIE(dwarf::DW_TAG_subprogram); - addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, - SPDeclDie); - SPCU->addDie(SPDie); + DISubprogram SPDecl = SP.getFunctionDeclaration(); + if (SPDecl.isSubprogram()) + // Refer function declaration directly. + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, + createSubprogramDIE(SPDecl)); + else { + // There is not any need to generate specification DIE for a function + // defined at compile unit level. If a function is defined inside another + // function then gdb prefers the definition at top level and but does not + // expect specification DIE in parent function. So avoid creating + // specification DIE for a function defined inside a function. + if (SP.isDefinition() && !SP.getContext().isCompileUnit() && + !SP.getContext().isFile() && + !isSubprogramContext(SP.getContext())) { + SPCU-> addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + + // Add arguments. + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); + unsigned SPTag = SPTy.getTag(); + if (SPTag == dwarf::DW_TAG_subroutine_type) + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + DIType ATy = DIType(DIType(Args.getElement(i))); + SPCU->addType(Arg, ATy); + if (ATy.isArtificial()) + SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + SPDie->addChild(Arg); + } + DIE *SPDeclDie = SPDie; + SPDie = new DIE(dwarf::DW_TAG_subprogram); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, + SPDeclDie); + SPCU->addDie(SPDie); + } } - // Pick up abstract subprogram DIE. if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) { SPDie = new DIE(dwarf::DW_TAG_subprogram); - addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, AbsSPDIE); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, AbsSPDIE); SPCU->addDie(SPDie); } - addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber())); - addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - Asm->GetTempSymbol("func_end", Asm->getFunctionNumber())); + SPCU->addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber())); + SPCU->addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + Asm->GetTempSymbol("func_end", Asm->getFunctionNumber())); const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); MachineLocation Location(RI->getFrameRegister(*Asm->MF)); - addAddress(SPDie, dwarf::DW_AT_frame_base, Location); + SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location); return SPDie; } @@ -1570,13 +527,14 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { if (Ranges.empty()) return 0; + CompileUnit *TheCU = getCompileUnit(Scope->getScopeNode()); SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(); if (Ranges.size() > 1) { // .debug_range section has not been laid out yet. Emit offset in // .debug_range as a uint, size 4, for now. emitDIE will handle // DW_AT_ranges appropriately. - addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, - DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize()); + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, + DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize()); for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); @@ -1595,8 +553,8 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); assert(End->isDefined() && "Invalid end label for an inlined scope!"); - addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start); - addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End); + TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start); + TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End); return ScopeDIE; } @@ -1636,11 +594,11 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { CompileUnit *TheCU = getCompileUnit(InlinedSP); DIE *OriginDIE = TheCU->getDIE(InlinedSP); assert(OriginDIE && "Unable to find Origin DIE!"); - addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, OriginDIE); + TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, OriginDIE); - addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, StartLabel); - addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, EndLabel); + TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, StartLabel); + TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, EndLabel); InlinedSubprogramDIEs.insert(OriginDIE); @@ -1656,8 +614,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { I->second.push_back(std::make_pair(StartLabel, ScopeDIE)); DILocation DL(Scope->getInlinedAt()); - addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID()); - addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID()); + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); return ScopeDIE; } @@ -1686,7 +644,7 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { // Define variable debug information entry. DIE *VariableDie = new DIE(Tag); - + CompileUnit *VariableCU = getCompileUnit(DV->getVariable()); DIE *AbsDIE = NULL; DenseMap<const DbgVariable *, const DbgVariable *>::iterator V2AVI = VarToAbstractVarMap.find(DV); @@ -1694,20 +652,23 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { AbsDIE = V2AVI->second->getDIE(); if (AbsDIE) - addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, AbsDIE); + VariableCU->addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, AbsDIE); else { - addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - addSourceLine(VariableDie, DV->getVariable()); + VariableCU->addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, + Name); + VariableCU->addSourceLine(VariableDie, DV->getVariable()); // Add variable type. - addType(VariableDie, DV->getType()); + VariableCU->addType(VariableDie, DV->getType()); } if (Tag == dwarf::DW_TAG_formal_parameter && DV->getType().isArtificial()) - addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + VariableCU->addUInt(VariableDie, dwarf::DW_AT_artificial, + dwarf::DW_FORM_flag, 1); else if (DIVariable(DV->getVariable()).isArtificial()) - addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + VariableCU->addUInt(VariableDie, dwarf::DW_AT_artificial, + dwarf::DW_FORM_flag, 1); if (Scope->isAbstractScope()) { DV->setDIE(VariableDie); @@ -1718,7 +679,7 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { unsigned Offset = DV->getDotDebugLocOffset(); if (Offset != ~0U) { - addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4, + VariableCU->addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4, Asm->GetTempSymbol("debug_loc", Offset)); DV->setDIE(VariableDie); UseDotDebugLocEntry.insert(VariableDie); @@ -1738,22 +699,30 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); if (DVInsn->getOperand(1).isImm() && TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) { - addVariableAddress(DV, VariableDie, DVInsn->getOperand(1).getImm()); - updated = true; - } else - updated = addRegisterAddress(VariableDie, RegOp); + unsigned FrameReg = 0; + const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); + int Offset = + TFI->getFrameIndexReference(*Asm->MF, + DVInsn->getOperand(1).getImm(), + FrameReg); + MachineLocation Location(FrameReg, Offset); + VariableCU->addVariableAddress(DV, VariableDie, Location); + + } else if (RegOp.getReg()) + VariableCU->addVariableAddress(DV, VariableDie, + MachineLocation(RegOp.getReg())); + updated = true; } else if (DVInsn->getOperand(0).isImm()) - updated = addConstantValue(VariableDie, DVInsn->getOperand(0)); + updated = VariableCU->addConstantValue(VariableDie, + DVInsn->getOperand(0)); else if (DVInsn->getOperand(0).isFPImm()) updated = - addConstantFPValue(VariableDie, DVInsn->getOperand(0)); + VariableCU->addConstantFPValue(VariableDie, DVInsn->getOperand(0)); } else { - MachineLocation Location = Asm->getDebugValueLocation(DVInsn); - if (Location.getReg()) { - addAddress(VariableDie, dwarf::DW_AT_location, Location); - updated = true; - } + VariableCU->addVariableAddress(DV, VariableDie, + Asm->getDebugValueLocation(DVInsn)); + updated = true; } if (!updated) { // If variableDie is not updated then DBG_VALUE instruction does not @@ -1767,15 +736,21 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { // .. else use frame index, if available. int FI = 0; - if (findVariableFrameIndex(DV, &FI)) - addVariableAddress(DV, VariableDie, FI); - + if (findVariableFrameIndex(DV, &FI)) { + unsigned FrameReg = 0; + const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); + int Offset = + TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); + MachineLocation Location(FrameReg, Offset); + VariableCU->addVariableAddress(DV, VariableDie, Location); + } + DV->setDIE(VariableDie); return VariableDie; } -void DwarfDebug::addPubTypes(DISubprogram SP) { +void CompileUnit::addPubTypes(DISubprogram SP) { DICompositeType SPTy = SP.getType(); unsigned SPTag = SPTy.getTag(); if (SPTag != dwarf::DW_TAG_subroutine_type) @@ -1789,9 +764,8 @@ void DwarfDebug::addPubTypes(DISubprogram SP) { DICompositeType CATy = getDICompositeType(ATy); if (DIDescriptor(CATy).Verify() && !CATy.getName().empty() && !CATy.isForwardDecl()) { - CompileUnit *TheCU = getCompileUnit(CATy); - if (DIEEntry *Entry = TheCU->getDIEEntry(CATy)) - TheCU->addGlobalType(CATy.getName(), Entry->getEntry()); + if (DIEEntry *Entry = getDIEEntry(CATy)) + addGlobalType(CATy.getName(), Entry->getEntry()); } } } @@ -1802,6 +776,14 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { return NULL; SmallVector <DIE *, 8> Children; + + // Collect arguments for current function. + if (Scope == CurrentFnDbgScope) + for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i) + if (DbgVariable *ArgDV = CurrentFnArguments[i]) + if (DIE *Arg = constructVariableDIE(ArgDV, Scope)) + Children.push_back(Arg); + // Collect lexical scope childrens first. const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables(); for (unsigned i = 0, N = Variables.size(); i < N; ++i) @@ -1841,7 +823,7 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { ScopeDIE->addChild(*I); if (DS.isSubprogram()) - addPubTypes(DISubprogram(DS)); + getCompileUnit(DS)->addPubTypes(DISubprogram(DS)); return ScopeDIE; } @@ -1851,10 +833,21 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { /// in the SourceIds map. This can update DirectoryNames and SourceFileNames /// maps as well. -unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName){ +unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, + StringRef DirName) { // If FE did not provide a file name, then assume stdin. if (FileName.empty()) - return GetOrCreateSourceID("<stdin>"); + return GetOrCreateSourceID("<stdin>", StringRef()); + + // MCStream expects full path name as filename. + if (!DirName.empty() && !FileName.startswith("/")) { + std::string FullPathName(DirName.data()); + if (!DirName.endswith("/")) + FullPathName += "/"; + FullPathName += FileName.data(); + // Here FullPathName will be copied into StringMap by GetOrCreateSourceID. + return GetOrCreateSourceID(StringRef(FullPathName), StringRef()); + } StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName); if (Entry.getValue()) @@ -1864,19 +857,18 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName){ Entry.setValue(SrcId); // Print out a .file directive to specify files for .loc directives. - Asm->OutStreamer.EmitDwarfFileDirective(SrcId, FileName); + Asm->OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey()); return SrcId; } /// getOrCreateNameSpace - Create a DIE for DINameSpace. -DIE *DwarfDebug::getOrCreateNameSpace(DINameSpace NS) { - CompileUnit *TheCU = getCompileUnit(NS); - DIE *NDie = TheCU->getDIE(NS); +DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) { + DIE *NDie = getDIE(NS); if (NDie) return NDie; NDie = new DIE(dwarf::DW_TAG_namespace); - TheCU->insertDIE(NS, NDie); + insertDIE(NS, NDie); if (!NS.getName().empty()) addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName()); addSourceLine(NDie, NS); @@ -1890,40 +882,40 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) { DICompileUnit DIUnit(N); StringRef FN = DIUnit.getFilename(); StringRef Dir = DIUnit.getDirectory(); - unsigned ID = GetOrCreateSourceID(FN); + unsigned ID = GetOrCreateSourceID(FN, Dir); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string, - DIUnit.getProducer()); - addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, - DIUnit.getLanguage()); - addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); + CompileUnit *NewCU = new CompileUnit(ID, Die, Asm, this); + NewCU->addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string, + DIUnit.getProducer()); + NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, + DIUnit.getLanguage()); + NewCU->addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This // simplifies debug range entries. - addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0); + NewCU->addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0); // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. if (Asm->MAI->doesDwarfUsesAbsoluteLabelForStmtList()) - addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_addr, - Asm->GetTempSymbol("section_line")); + NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_addr, + Asm->GetTempSymbol("section_line")); else - addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); + NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); if (!Dir.empty()) - addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); + NewCU->addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); if (DIUnit.isOptimized()) - addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + NewCU->addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); StringRef Flags = DIUnit.getFlags(); if (!Flags.empty()) - addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags); - + NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags); + unsigned RVer = DIUnit.getRunTimeVersion(); if (RVer) - addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, + NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, dwarf::DW_FORM_data1, RVer); - CompileUnit *NewCU = new CompileUnit(ID, Die); if (!FirstCU) FirstCU = NewCU; CUMap.insert(std::make_pair(N, NewCU)); @@ -2019,14 +1011,15 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) { bool isGlobalVariable = GV.getGlobal() != NULL; // Add name. - addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, - GV.getDisplayName()); + TheCU->addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, + GV.getDisplayName()); StringRef LinkageName = GV.getLinkageName(); if (!LinkageName.empty() && isGlobalVariable) - addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, - getRealLinkageName(LinkageName)); + TheCU->addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, + dwarf::DW_FORM_string, + getRealLinkageName(LinkageName)); // Add type. - addType(VariableDIE, GTy); + TheCU->addType(VariableDIE, GTy); if (GTy.isCompositeType() && !GTy.getName().empty() && !GTy.isForwardDecl()) { DIEEntry *Entry = TheCU->getDIEEntry(GTy); @@ -2035,22 +1028,22 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) { } // Add scoping info. if (!GV.isLocalToUnit()) { - addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + TheCU->addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); // Expose as global. TheCU->addGlobal(GV.getName(), VariableDIE); } // Add line number info. - addSourceLine(VariableDIE, GV); + TheCU->addSourceLine(VariableDIE, GV); // Add to map. TheCU->insertDIE(N, VariableDIE); // Add to context owner. DIDescriptor GVContext = GV.getContext(); - addToContextOwner(VariableDIE, GVContext); + TheCU->addToContextOwner(VariableDIE, GVContext); // Add location. if (isGlobalVariable) { DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Block, 0, dwarf::DW_FORM_udata, + TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + TheCU->addLabel(Block, 0, dwarf::DW_FORM_udata, Asm->Mang->getSymbol(GV.getGlobal())); // Do not create specification DIE if context is either compile unit // or a subprogram. @@ -2058,28 +1051,28 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) { !GVContext.isFile() && !isSubprogramContext(GVContext)) { // Create specification DIE. DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); - addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, + TheCU->addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, VariableDIE); - addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); - addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + TheCU->addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); + TheCU->addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); TheCU->addDie(VariableSpecDIE); } else { - addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); + TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); } } else if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(GV.getConstant())) - addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy)); + TheCU->addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy)); else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { // GV is a merged global. DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Block, 0, dwarf::DW_FORM_udata, - Asm->Mang->getSymbol(cast<GlobalValue>(CE->getOperand(0)))); + TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + TheCU->addLabel(Block, 0, dwarf::DW_FORM_udata, + Asm->Mang->getSymbol(cast<GlobalValue>(CE->getOperand(0)))); ConstantInt *CII = cast<ConstantInt>(CE->getOperand(2)); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(Block, 0, dwarf::DW_FORM_udata, CII->getZExtValue()); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); + TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + TheCU->addUInt(Block, 0, dwarf::DW_FORM_udata, CII->getZExtValue()); + TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); } return; @@ -2105,7 +1098,7 @@ void DwarfDebug::constructSubprogramDIE(const MDNode *N) { TheCU->insertDIE(N, SubprogramDie); // Add to context owner. - addToContextOwner(SubprogramDie, SP.getContext()); + TheCU->addToContextOwner(SubprogramDie, SP.getContext()); // Expose as global. TheCU->addGlobal(SP.getName(), SubprogramDie); @@ -2160,12 +1153,16 @@ void DwarfDebug::beginModule(Module *M) { //getOrCreateTypeDIE if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) - getOrCreateTypeDIE(DIType(NMD->getOperand(i))); + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIType Ty(NMD->getOperand(i)); + getCompileUnit(Ty)->getOrCreateTypeDIE(Ty); + } if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) - getOrCreateTypeDIE(DIType(NMD->getOperand(i))); + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIType Ty(NMD->getOperand(i)); + getCompileUnit(Ty)->getOrCreateTypeDIE(Ty); + } // Prime section data. SectionMap.insert(Asm->getObjFileLowering().getTextSection()); @@ -2216,7 +1213,7 @@ void DwarfDebug::endModule() { for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(), AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) { DIE *ISP = *AI; - addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); } for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(), @@ -2226,7 +1223,8 @@ void DwarfDebug::endModule() { if (!N) continue; DIE *NDie = getCompileUnit(N)->getDIE(N); if (!NDie) continue; - addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); + getCompileUnit(N)->addDIEEntry(SPDie, dwarf::DW_AT_containing_type, + dwarf::DW_FORM_ref4, NDie); } // Standard sections final addresses. @@ -2309,6 +1307,30 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, return AbsDbgVariable; } +/// addCurrentFnArgument - If Var is an current function argument that add +/// it in CurrentFnArguments list. +bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF, + DbgVariable *Var, DbgScope *Scope) { + if (Scope != CurrentFnDbgScope) + return false; + DIVariable DV = Var->getVariable(); + if (DV.getTag() != dwarf::DW_TAG_arg_variable) + return false; + unsigned ArgNo = DV.getArgNumber(); + if (ArgNo == 0) + return false; + + size_t Size = CurrentFnArguments.size(); + if (Size == 0) + CurrentFnArguments.resize(MF->getFunction()->arg_size()); + // llvm::Function argument size is not good indicator of how many + // arguments does the function have at source level. + if (ArgNo > Size) + CurrentFnArguments.resize(ArgNo * 2); + CurrentFnArguments[ArgNo - 1] = Var; + return true; +} + /// collectVariableInfoFromMMITable - Collect variable information from /// side table maintained by MMI. void @@ -2337,7 +1359,8 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF, DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second); DbgVariable *RegVar = new DbgVariable(DV); recordVariableFrameIndex(RegVar, VP.first); - Scope->addVariable(RegVar); + if (!addCurrentFnArgument(MF, RegVar, Scope)) + Scope->addVariable(RegVar); if (AbsDbgVariable) { recordVariableFrameIndex(AbsDbgVariable, VP.first); VarToAbstractVarMap[RegVar] = AbsDbgVariable; @@ -2349,9 +1372,9 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF, /// DBG_VALUE instruction, is in a defined reg. static bool isDbgValueInDefinedReg(const MachineInstr *MI) { assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); - if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg()) - return true; - return false; + return MI->getNumOperands() == 3 && + MI->getOperand(0).isReg() && MI->getOperand(0).getReg() && + MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0; } /// collectVariableInfo - Populate DbgScope entries with variables' info. @@ -2362,41 +1385,21 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, /// collection info from MMI table. collectVariableInfoFromMMITable(MF, Processed); - SmallVector<const MachineInstr *, 8> DbgValues; - // Collect variable information from DBG_VALUE machine instructions; - for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end(); - I != E; ++I) - for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); - II != IE; ++II) { - const MachineInstr *MInsn = II; - if (!MInsn->isDebugValue()) - continue; - DbgValues.push_back(MInsn); - } - - // This is a collection of DBV_VALUE instructions describing same variable. - SmallVector<const MachineInstr *, 4> MultipleValues; - for(SmallVector<const MachineInstr *, 8>::iterator I = DbgValues.begin(), - E = DbgValues.end(); I != E; ++I) { - const MachineInstr *MInsn = *I; - MultipleValues.clear(); - if (isDbgValueInDefinedReg(MInsn)) - MultipleValues.push_back(MInsn); - DIVariable DV(MInsn->getOperand(MInsn->getNumOperands() - 1).getMetadata()); - if (Processed.count(DV) != 0) + for (SmallVectorImpl<const MDNode*>::const_iterator + UVI = UserVariables.begin(), UVE = UserVariables.end(); UVI != UVE; + ++UVI) { + const MDNode *Var = *UVI; + if (Processed.count(Var)) continue; - const MachineInstr *PrevMI = MInsn; - for (SmallVector<const MachineInstr *, 8>::iterator MI = I+1, - ME = DbgValues.end(); MI != ME; ++MI) { - const MDNode *Var = - (*MI)->getOperand((*MI)->getNumOperands()-1).getMetadata(); - if (Var == DV && - !PrevMI->isIdenticalTo(*MI)) - MultipleValues.push_back(*MI); - PrevMI = *MI; - } + // History contains relevant DBG_VALUE instructions for Var and instructions + // clobbering it. + SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var]; + if (History.empty()) + continue; + const MachineInstr *MInsn = History.front(); + DIVariable DV(Var); DbgScope *Scope = NULL; if (DV.getTag() == dwarf::DW_TAG_arg_variable && DISubprogram(DV.getContext()).describes(MF->getFunction())) @@ -2408,32 +1411,29 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, continue; Processed.insert(DV); + assert(MInsn->isDebugValue() && "History must begin with debug value"); DbgVariable *RegVar = new DbgVariable(DV); - Scope->addVariable(RegVar); + if (!addCurrentFnArgument(MF, RegVar, Scope)) + Scope->addVariable(RegVar); if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) { DbgVariableToDbgInstMap[AbsVar] = MInsn; VarToAbstractVarMap[RegVar] = AbsVar; } - if (MultipleValues.size() <= 1) { + + // Simple ranges that are fully coalesced. + if (History.size() <= 1 || (History.size() == 2 && + MInsn->isIdenticalTo(History.back()))) { DbgVariableToDbgInstMap[RegVar] = MInsn; continue; } // handle multiple DBG_VALUE instructions describing one variable. - if (DotDebugLocEntries.empty()) - RegVar->setDotDebugLocOffset(0); - else - RegVar->setDotDebugLocOffset(DotDebugLocEntries.size()); - const MachineInstr *Begin = NULL; - const MachineInstr *End = NULL; - for (SmallVector<const MachineInstr *, 4>::iterator - MVI = MultipleValues.begin(), MVE = MultipleValues.end(); - MVI != MVE; ++MVI) { - if (!Begin) { - Begin = *MVI; - continue; - } - End = *MVI; + RegVar->setDotDebugLocOffset(DotDebugLocEntries.size()); + + for (SmallVectorImpl<const MachineInstr*>::const_iterator + HI = History.begin(), HE = History.end(); HI != HE; ++HI) { + const MachineInstr *Begin = *HI; + assert(Begin->isDebugValue() && "Invalid History entry"); MachineLocation MLoc; if (Begin->getNumOperands() == 3) { if (Begin->getOperand(0).isReg() && Begin->getOperand(1).isImm()) @@ -2441,25 +1441,32 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, } else MLoc = Asm->getDebugValueLocation(Begin); + // FIXME: emitDebugLoc only understands registers. + if (!MLoc.getReg()) + continue; + + // Compute the range for a register location. const MCSymbol *FLabel = getLabelBeforeInsn(Begin); - const MCSymbol *SLabel = getLabelBeforeInsn(End); - if (MLoc.getReg()) - DotDebugLocEntries.push_back(DotDebugLocEntry(FLabel, SLabel, MLoc)); - - Begin = End; - if (MVI + 1 == MVE) { - // If End is the last instruction then its value is valid - // until the end of the funtion. - MachineLocation EMLoc; - if (End->getNumOperands() == 3) { - if (End->getOperand(0).isReg() && Begin->getOperand(1).isImm()) - EMLoc.set(Begin->getOperand(0).getReg(), Begin->getOperand(1).getImm()); - } else - EMLoc = Asm->getDebugValueLocation(End); - if (EMLoc.getReg()) - DotDebugLocEntries. - push_back(DotDebugLocEntry(SLabel, FunctionEndSym, EMLoc)); + const MCSymbol *SLabel = 0; + + if (HI + 1 == HE) + // If Begin is the last instruction in History then its value is valid + // until the end of the function. + SLabel = FunctionEndSym; + else { + const MachineInstr *End = HI[1]; + if (End->isDebugValue()) + SLabel = getLabelBeforeInsn(End); + else { + // End is a normal instruction clobbering the range. + SLabel = getLabelAfterInsn(End); + assert(SLabel && "Forgot label after clobber instruction"); + ++HI; + } } + + // The value is valid until the next DBG_VALUE or clobber. + DotDebugLocEntries.push_back(DotDebugLocEntry(FLabel, SLabel, MLoc, Var)); } DotDebugLocEntries.push_back(DotDebugLocEntry()); } @@ -2480,66 +1487,74 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, /// getLabelBeforeInsn - Return Label preceding the instruction. const MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) { - DenseMap<const MachineInstr *, MCSymbol *>::iterator I = - LabelsBeforeInsn.find(MI); - if (I == LabelsBeforeInsn.end()) - // FunctionBeginSym always preceeds all the instruction in current function. - return FunctionBeginSym; - return I->second; + MCSymbol *Label = LabelsBeforeInsn.lookup(MI); + assert(Label && "Didn't insert label before instruction"); + return Label; } /// getLabelAfterInsn - Return Label immediately following the instruction. const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) { - DenseMap<const MachineInstr *, MCSymbol *>::iterator I = - LabelsAfterInsn.find(MI); - if (I == LabelsAfterInsn.end()) - return NULL; - return I->second; + return LabelsAfterInsn.lookup(MI); } /// beginInstruction - Process beginning of an instruction. void DwarfDebug::beginInstruction(const MachineInstr *MI) { - if (InsnNeedsLabel.count(MI) == 0) { - LabelsBeforeInsn[MI] = PrevLabel; - return; + // Check if source location changes, but ignore DBG_VALUE locations. + if (!MI->isDebugValue()) { + DebugLoc DL = MI->getDebugLoc(); + if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) { + PrevInstLoc = DL; + if (!DL.isUnknown()) { + const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext()); + recordSourceLine(DL.getLine(), DL.getCol(), Scope); + } else + recordSourceLine(0, 0, 0); + } } - // Check location. - DebugLoc DL = MI->getDebugLoc(); - if (!DL.isUnknown()) { - const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext()); - PrevLabel = recordSourceLine(DL.getLine(), DL.getCol(), Scope); - PrevInstLoc = DL; - LabelsBeforeInsn[MI] = PrevLabel; - return; - } + // Insert labels where requested. + DenseMap<const MachineInstr*, MCSymbol*>::iterator I = + LabelsBeforeInsn.find(MI); - // If location is unknown then use temp label for this DBG_VALUE - // instruction. - if (MI->isDebugValue()) { - PrevLabel = MMI->getContext().CreateTempSymbol(); - Asm->OutStreamer.EmitLabel(PrevLabel); - LabelsBeforeInsn[MI] = PrevLabel; + // No label needed. + if (I == LabelsBeforeInsn.end()) return; - } - if (UnknownLocations) { - PrevLabel = recordSourceLine(0, 0, 0); - LabelsBeforeInsn[MI] = PrevLabel; + // Label already assigned. + if (I->second) return; - } - assert (0 && "Instruction is not processed!"); + if (!PrevLabel) { + PrevLabel = MMI->getContext().CreateTempSymbol(); + Asm->OutStreamer.EmitLabel(PrevLabel); + } + I->second = PrevLabel; } /// endInstruction - Process end of an instruction. void DwarfDebug::endInstruction(const MachineInstr *MI) { - if (InsnsEndScopeSet.count(MI) != 0) { - // Emit a label if this instruction ends a scope. - MCSymbol *Label = MMI->getContext().CreateTempSymbol(); - Asm->OutStreamer.EmitLabel(Label); - LabelsAfterInsn[MI] = Label; + // Don't create a new label after DBG_VALUE instructions. + // They don't generate code. + if (!MI->isDebugValue()) + PrevLabel = 0; + + DenseMap<const MachineInstr*, MCSymbol*>::iterator I = + LabelsAfterInsn.find(MI); + + // No label needed. + if (I == LabelsAfterInsn.end()) + return; + + // Label already assigned. + if (I->second) + return; + + // We need a label after this instruction. + if (!PrevLabel) { + PrevLabel = MMI->getContext().CreateTempSymbol(); + Asm->OutStreamer.EmitLabel(PrevLabel); } + I->second = PrevLabel; } /// getOrCreateDbgScope - Create DbgScope for the scope. @@ -2799,7 +1814,8 @@ void DwarfDebug::identifyScopeMarkers() { RE = Ranges.end(); RI != RE; ++RI) { assert(RI->first && "DbgRange does not have first instruction!"); assert(RI->second && "DbgRange does not have second instruction!"); - InsnsEndScopeSet.insert(RI->second); + requestLabelBeforeInsn(RI->first); + requestLabelAfterInsn(RI->second); } } } @@ -2877,45 +1893,145 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { recordSourceLine(Line, Col, TheScope); + assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned"); + /// ProcessedArgs - Collection of arguments already processed. SmallPtrSet<const MDNode *, 8> ProcessedArgs; - DebugLoc PrevLoc; + const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); + + /// LiveUserVar - Map physreg numbers to the MDNode they contain. + std::vector<const MDNode*> LiveUserVar(TRI->getNumRegs()); + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); - I != E; ++I) + I != E; ++I) { + bool AtBlockEntry = true; for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { const MachineInstr *MI = II; - DebugLoc DL = MI->getDebugLoc(); + if (MI->isDebugValue()) { assert (MI->getNumOperands() > 1 && "Invalid machine instruction!"); - DIVariable DV(MI->getOperand(MI->getNumOperands() - 1).getMetadata()); - if (!DV.Verify()) continue; - // If DBG_VALUE is for a local variable then it needs a label. - if (DV.getTag() != dwarf::DW_TAG_arg_variable) - InsnNeedsLabel.insert(MI); - // DBG_VALUE for inlined functions argument needs a label. - else if (!DISubprogram(getDISubprogram(DV.getContext())). - describes(MF->getFunction())) - InsnNeedsLabel.insert(MI); - // DBG_VALUE indicating argument location change needs a label. - else if (!ProcessedArgs.insert(DV)) - InsnNeedsLabel.insert(MI); + + // Keep track of user variables. + const MDNode *Var = + MI->getOperand(MI->getNumOperands() - 1).getMetadata(); + + // Variable is in a register, we need to check for clobbers. + if (isDbgValueInDefinedReg(MI)) + LiveUserVar[MI->getOperand(0).getReg()] = Var; + + // Check the history of this variable. + SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var]; + if (History.empty()) { + UserVariables.push_back(Var); + // The first mention of a function argument gets the FunctionBeginSym + // label, so arguments are visible when breaking at function entry. + DIVariable DV(Var); + if (DV.Verify() && DV.getTag() == dwarf::DW_TAG_arg_variable && + DISubprogram(getDISubprogram(DV.getContext())) + .describes(MF->getFunction())) + LabelsBeforeInsn[MI] = FunctionBeginSym; + } else { + // We have seen this variable before. Try to coalesce DBG_VALUEs. + const MachineInstr *Prev = History.back(); + if (Prev->isDebugValue()) { + // Coalesce identical entries at the end of History. + if (History.size() >= 2 && + Prev->isIdenticalTo(History[History.size() - 2])) + History.pop_back(); + + // Terminate old register assignments that don't reach MI; + MachineFunction::const_iterator PrevMBB = Prev->getParent(); + if (PrevMBB != I && (!AtBlockEntry || llvm::next(PrevMBB) != I) && + isDbgValueInDefinedReg(Prev)) { + // Previous register assignment needs to terminate at the end of + // its basic block. + MachineBasicBlock::const_iterator LastMI = + PrevMBB->getLastNonDebugInstr(); + if (LastMI == PrevMBB->end()) + // Drop DBG_VALUE for empty range. + History.pop_back(); + else { + // Terminate after LastMI. + History.push_back(LastMI); + } + } + } + } + History.push_back(MI); } else { - // If location is unknown then instruction needs a location only if - // UnknownLocations flag is set. - if (DL.isUnknown()) { - if (UnknownLocations && !PrevLoc.isUnknown()) - InsnNeedsLabel.insert(MI); - } else if (DL != PrevLoc) - // Otherwise, instruction needs a location only if it is new location. - InsnNeedsLabel.insert(MI); + // Not a DBG_VALUE instruction. + if (!MI->isLabel()) + AtBlockEntry = false; + + // Check if the instruction clobbers any registers with debug vars. + for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg()) + continue; + for (const unsigned *AI = TRI->getOverlaps(MOI->getReg()); + unsigned Reg = *AI; ++AI) { + const MDNode *Var = LiveUserVar[Reg]; + if (!Var) + continue; + // Reg is now clobbered. + LiveUserVar[Reg] = 0; + + // Was MD last defined by a DBG_VALUE referring to Reg? + DbgValueHistoryMap::iterator HistI = DbgValues.find(Var); + if (HistI == DbgValues.end()) + continue; + SmallVectorImpl<const MachineInstr*> &History = HistI->second; + if (History.empty()) + continue; + const MachineInstr *Prev = History.back(); + // Sanity-check: Register assignments are terminated at the end of + // their block. + if (!Prev->isDebugValue() || Prev->getParent() != MI->getParent()) + continue; + // Is the variable still in Reg? + if (!isDbgValueInDefinedReg(Prev) || + Prev->getOperand(0).getReg() != Reg) + continue; + // Var is clobbered. Make sure the next instruction gets a label. + History.push_back(MI); + } + } } + } + } + + for (DbgValueHistoryMap::iterator I = DbgValues.begin(), E = DbgValues.end(); + I != E; ++I) { + SmallVectorImpl<const MachineInstr*> &History = I->second; + if (History.empty()) + continue; - if (!DL.isUnknown() || UnknownLocations) - PrevLoc = DL; + // Make sure the final register assignments are terminated. + const MachineInstr *Prev = History.back(); + if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) { + const MachineBasicBlock *PrevMBB = Prev->getParent(); + MachineBasicBlock::const_iterator LastMI = PrevMBB->getLastNonDebugInstr(); + if (LastMI == PrevMBB->end()) + // Drop DBG_VALUE for empty range. + History.pop_back(); + else { + // Terminate after LastMI. + History.push_back(LastMI); + } } + // Request labels for the full history. + for (unsigned i = 0, e = History.size(); i != e; ++i) { + const MachineInstr *MI = History[i]; + if (MI->isDebugValue()) + requestLabelBeforeInsn(MI); + else + requestLabelAfterInsn(MI); + } + } + PrevInstLoc = DebugLoc(); PrevLabel = FunctionBeginSym; } @@ -2963,8 +2079,9 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope); if (!DisableFramePointerElim(*MF)) - addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr, - dwarf::DW_FORM_flag, 1); + getCompileUnit(CurrentFnDbgScope->getScopeNode())->addUInt(CurFnDIE, + dwarf::DW_AT_APPLE_omit_frame_ptr, + dwarf::DW_FORM_flag, 1); DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(), @@ -2973,12 +2090,13 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // Clear debug info CurrentFnDbgScope = NULL; - InsnNeedsLabel.clear(); + DeleteContainerPointers(CurrentFnArguments); DbgVariableToFrameIndexMap.clear(); VarToAbstractVarMap.clear(); DbgVariableToDbgInstMap.clear(); DeleteContainerSeconds(DbgScopeMap); - InsnsEndScopeSet.clear(); + UserVariables.clear(); + DbgValues.clear(); ConcreteScopes.clear(); DeleteContainerSeconds(AbstractScopes); AbstractScopesList.clear(); @@ -3029,10 +2147,9 @@ DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) { /// recordSourceLine - Register a source line with debug info. Returns the /// unique label that was emitted and which provides correspondence to /// the source line list. -MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, - const MDNode *S) { +void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S){ StringRef Fn; - + StringRef Dir; unsigned Src = 1; if (S) { DIDescriptor Scope(S); @@ -3040,27 +2157,26 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, if (Scope.isCompileUnit()) { DICompileUnit CU(S); Fn = CU.getFilename(); + Dir = CU.getDirectory(); } else if (Scope.isFile()) { DIFile F(S); Fn = F.getFilename(); + Dir = F.getDirectory(); } else if (Scope.isSubprogram()) { DISubprogram SP(S); Fn = SP.getFilename(); + Dir = SP.getDirectory(); } else if (Scope.isLexicalBlock()) { DILexicalBlock DB(S); Fn = DB.getFilename(); + Dir = DB.getDirectory(); } else assert(0 && "Unexpected scope info"); - Src = GetOrCreateSourceID(Fn); + Src = GetOrCreateSourceID(Fn, Dir); } - Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, DWARF2_FLAG_IS_STMT, - 0, 0); - - MCSymbol *Label = MMI->getContext().CreateTempSymbol(); - Asm->OutStreamer.EmitLabel(Label); - return Label; + 0, 0, Fn); } //===----------------------------------------------------------------------===// @@ -3118,17 +2234,15 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) { /// computeSizeAndOffsets - Compute the size and offset of all the DIEs. /// void DwarfDebug::computeSizeAndOffsets() { - unsigned PrevOffset = 0; for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { // Compute size of compile unit header. - static unsigned Offset = PrevOffset + + unsigned Offset = sizeof(int32_t) + // Length of Compilation Unit Info sizeof(int16_t) + // DWARF version number sizeof(int32_t) + // Offset Into Abbrev. Section sizeof(int8_t); // Pointer Size (in bytes) computeSizeAndOffset(I->second->getCUDie(), Offset, true); - PrevOffset = Offset; } } @@ -3289,8 +2403,7 @@ void DwarfDebug::emitDebugInfo() { unsigned ContentSize = Die->getSize() + sizeof(int16_t) + // DWARF version number sizeof(int32_t) + // Offset Into Abbrev. Section - sizeof(int8_t) + // Pointer Size (in bytes) - sizeof(int32_t); // FIXME - extra pad for gdb bug. + sizeof(int8_t); // Pointer Size (in bytes) Asm->OutStreamer.AddComment("Length of Compilation Unit Info"); Asm->EmitInt32(ContentSize); @@ -3303,12 +2416,6 @@ void DwarfDebug::emitDebugInfo() { Asm->EmitInt8(Asm->getTargetData().getPointerSize()); emitDIE(Die); - // FIXME - extra padding for gdb bug. - Asm->OutStreamer.AddComment("4 extra padding bytes for GDB"); - Asm->EmitInt8(0); - Asm->EmitInt8(0); - Asm->EmitInt8(0); - Asm->EmitInt8(0); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID())); } } @@ -3614,32 +2721,38 @@ void DwarfDebug::emitDebugLoc() { } else { Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size, 0); Asm->OutStreamer.EmitSymbolValue(Entry.End, Size, 0); - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - unsigned Reg = RI->getDwarfRegNum(Entry.Loc.getReg(), false); - if (int Offset = Entry.Loc.getOffset()) { - // If the value is at a certain offset from frame register then - // use DW_OP_fbreg. - unsigned OffsetSize = Offset ? MCAsmInfo::getSLEB128Size(Offset) : 1; + DIVariable DV(Entry.Variable); + if (DV.hasComplexAddress()) { + unsigned N = DV.getNumAddrElements(); + unsigned i = 0; Asm->OutStreamer.AddComment("Loc expr size"); - Asm->EmitInt16(1 + OffsetSize); - Asm->OutStreamer.AddComment( - dwarf::OperationEncodingString(dwarf::DW_OP_fbreg)); - Asm->EmitInt8(dwarf::DW_OP_fbreg); - Asm->OutStreamer.AddComment("Offset"); - Asm->EmitSLEB128(Offset); - } else { - if (Reg < 32) { - Asm->OutStreamer.AddComment("Loc expr size"); - Asm->EmitInt16(1); - Asm->OutStreamer.AddComment( - dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg)); - Asm->EmitInt8(dwarf::DW_OP_reg0 + Reg); + if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { + // If first address element is OpPlus then emit + // DW_OP_breg + Offset instead of DW_OP_reg + Offset. + MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1)); + Asm->EmitInt16(Asm->getDwarfRegOpSize(Loc) + N - 2); + Asm->EmitDwarfRegOp(Loc); +// Asm->EmitULEB128(DV.getAddrElement(1)); + i = 2; } else { - Asm->OutStreamer.AddComment("Loc expr size"); - Asm->EmitInt16(1 + MCAsmInfo::getULEB128Size(Reg)); - Asm->EmitInt8(dwarf::DW_OP_regx); - Asm->EmitULEB128(Reg); + Asm->EmitInt16(Asm->getDwarfRegOpSize(Entry.Loc) + N); + Asm->EmitDwarfRegOp(Entry.Loc); + } + + // Emit remaining complex address elements. + for (; i < N; ++i) { + uint64_t Element = DV.getAddrElement(i); + if (Element == DIBuilder::OpPlus) { + Asm->EmitInt8(dwarf::DW_OP_plus_uconst); + Asm->EmitULEB128(DV.getAddrElement(++i)); + } else if (Element == DIBuilder::OpDeref) + Asm->EmitInt8(dwarf::DW_OP_deref); + else llvm_unreachable("unknown Opcode found in complex address"); } + } else { + Asm->OutStreamer.AddComment("Loc expr size"); + Asm->EmitInt16(Asm->getDwarfRegOpSize(Entry.Loc)); + Asm->EmitDwarfRegOp(Entry.Loc); } } } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 7df0510..25f2675 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -16,6 +16,7 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineLocation.h" +#include "llvm/Analysis/DebugInfo.h" #include "DIE.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" @@ -40,21 +41,6 @@ class DIE; class DIEBlock; class DIEEntry; -class DIEnumerator; -class DIDescriptor; -class DIVariable; -class DIGlobal; -class DIGlobalVariable; -class DISubprogram; -class DIBasicType; -class DIDerivedType; -class DIType; -class DINameSpace; -class DISubrange; -class DICompositeType; -class DITemplateTypeParameter; -class DITemplateValueParameter; - //===----------------------------------------------------------------------===// /// SrcLineInfo - This class is used to record source line correspondence. /// @@ -80,10 +66,12 @@ typedef struct DotDebugLocEntry { const MCSymbol *Begin; const MCSymbol *End; MachineLocation Loc; + const MDNode *Variable; bool Merged; - DotDebugLocEntry() : Begin(0), End(0), Merged(false) {} - DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L) - : Begin(B), End(E), Loc(L), Merged(false) {} + DotDebugLocEntry() : Begin(0), End(0), Variable(0), Merged(false) {} + DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L, + const MDNode *V) + : Begin(B), End(E), Loc(L), Variable(V), Merged(false) {} /// Empty entries are also used as a trigger to emit temp label. Such /// labels are referenced is used to find debug_loc offset for a given DIE. bool isEmpty() { return Begin == 0 && End == 0; } @@ -96,6 +84,43 @@ typedef struct DotDebugLocEntry { } } DotDebugLocEntry; +//===----------------------------------------------------------------------===// +/// DbgVariable - This class is used to track local variable information. +/// +class DbgVariable { + DIVariable Var; // Variable Descriptor. + DIE *TheDIE; // Variable DIE. + unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. +public: + // AbsVar may be NULL. + DbgVariable(DIVariable V) : Var(V), TheDIE(0), DotDebugLocOffset(~0U) {} + + // Accessors. + DIVariable getVariable() const { return Var; } + void setDIE(DIE *D) { TheDIE = D; } + DIE *getDIE() const { return TheDIE; } + void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } + unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } + StringRef getName() const { return Var.getName(); } + unsigned getTag() const { return Var.getTag(); } + bool variableHasComplexAddress() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.hasComplexAddress(); + } + bool isBlockByrefVariable() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.isBlockByrefVariable(); + } + unsigned getNumAddrElements() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.getNumAddrElements(); + } + uint64_t getAddrElement(unsigned i) const { + return Var.getAddrElement(i); + } + DIType getType() const; +}; + class DwarfDebug { /// Asm - Target of Dwarf emission. AsmPrinter *Asm; @@ -122,12 +147,6 @@ class DwarfDebug { /// id mapped to a unique id. StringMap<unsigned> SourceIdMap; - /// DIEBlocks - A list of all the DIEBlocks in use. - std::vector<DIEBlock *> DIEBlocks; - - // DIEValueAllocator - All DIEValues are allocated through this allocator. - BumpPtrAllocator DIEValueAllocator; - /// StringPool - A String->Symbol mapping of strings used by indirect /// references. StringMap<std::pair<MCSymbol*, unsigned> > StringPool; @@ -139,10 +158,13 @@ class DwarfDebug { /// UniqueVector<const MCSection*> SectionMap; - // CurrentFnDbgScope - Top level scope for the current function. - // + /// CurrentFnDbgScope - Top level scope for the current function. + /// DbgScope *CurrentFnDbgScope; + /// CurrentFnArguments - List of Arguments (DbgValues) for current function. + SmallVector<DbgVariable *, 8> CurrentFnArguments; + /// DbgScopeMap - Tracks the scopes in the current function. Owns the /// contained DbgScope*s. /// @@ -195,10 +217,6 @@ class DwarfDebug { /// corresponds to the MDNode mapped with the subprogram DIE. DenseMap<DIE *, const MDNode *> ContainingTypeMap; - typedef SmallVector<DbgScope *, 2> ScopeVector; - - SmallPtrSet<const MachineInstr *, 8> InsnsEndScopeSet; - /// InlineInfo - Keep track of inlined functions and their location. This /// information is used to populate debug_inlined section. typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels; @@ -217,9 +235,16 @@ class DwarfDebug { /// instruction. DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn; - /// insnNeedsLabel - Collection of instructions that need a label to mark - /// a debuggging information entity. - SmallPtrSet<const MachineInstr *, 8> InsnNeedsLabel; + /// UserVariables - Every user variable mentioned by a DBG_VALUE instruction + /// in order of appearance. + SmallVector<const MDNode*, 8> UserVariables; + + /// DbgValues - For each user variable, keep a list of DBG_VALUE + /// instructions in order. The list can also contain normal instructions that + /// clobber the previous DBG_VALUE. + typedef DenseMap<const MDNode*, SmallVector<const MachineInstr*, 4> > + DbgValueHistoryMap; + DbgValueHistoryMap DbgValues; SmallVector<const MCSymbol *, 8> DebugRangeSymbols; @@ -238,6 +263,9 @@ class DwarfDebug { std::vector<FunctionDebugFrameInfo> DebugFrames; + // DIEValueAllocator - All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; + // Section Symbols: these are assembler temporary labels that are emitted at // the beginning of each supported dwarf section. These are used to form // section offsets and are created by EmitSectionLabels. @@ -246,150 +274,12 @@ class DwarfDebug { MCSymbol *DwarfDebugLocSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; - DIEInteger *DIEIntegerOne; private: - /// getNumSourceIds - Return the number of unique source ids. - unsigned getNumSourceIds() const { - return SourceIdMap.size(); - } - /// assignAbbrevNumber - Define a unique number for the abbreviation. /// void assignAbbrevNumber(DIEAbbrev &Abbrev); - /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug - /// information entry. - DIEEntry *createDIEEntry(DIE *Entry); - - /// addUInt - Add an unsigned integer attribute data and value. - /// - void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer); - - /// addSInt - Add an signed integer attribute data and value. - /// - void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer); - - /// addString - Add a string attribute data and value. - /// - void addString(DIE *Die, unsigned Attribute, unsigned Form, - const StringRef Str); - - /// addLabel - Add a Dwarf label attribute data and value. - /// - void addLabel(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Label); - - /// addDelta - Add a label delta attribute data and value. - /// - void addDelta(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Hi, const MCSymbol *Lo); - - /// addDIEEntry - Add a DIE attribute data and value. - /// - void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry); - - /// addBlock - Add block data. - /// - void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block); - - /// addSourceLine - Add location information to specified debug information - /// entry. - void addSourceLine(DIE *Die, DIVariable V); - void addSourceLine(DIE *Die, DIGlobalVariable G); - void addSourceLine(DIE *Die, DISubprogram SP); - void addSourceLine(DIE *Die, DIType Ty); - void addSourceLine(DIE *Die, DINameSpace NS); - - /// addAddress - Add an address attribute to a die based on the location - /// provided. - void addAddress(DIE *Die, unsigned Attribute, - const MachineLocation &Location); - - /// addRegisterAddress - Add register location entry in variable DIE. - bool addRegisterAddress(DIE *Die, const MachineOperand &MO); - - /// addConstantValue - Add constant value entry in variable DIE. - bool addConstantValue(DIE *Die, const MachineOperand &MO); - bool addConstantValue(DIE *Die, ConstantInt *CI, bool Unsigned); - - /// addConstantFPValue - Add constant value entry in variable DIE. - bool addConstantFPValue(DIE *Die, const MachineOperand &MO); - - /// addComplexAddress - Start with the address based on the location provided, - /// and generate the DWARF information necessary to find the actual variable - /// (navigating the extra location information encoded in the type) based on - /// the starting location. Add the DWARF information to the die. - /// - void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, - const MachineLocation &Location); - - // FIXME: Should be reformulated in terms of addComplexAddress. - /// addBlockByrefAddress - Start with the address based on the location - /// provided, and generate the DWARF information necessary to find the - /// actual Block variable (navigating the Block struct) based on the - /// starting location. Add the DWARF information to the die. Obsolete, - /// please use addComplexAddress instead. - /// - void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, - const MachineLocation &Location); - - /// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based - /// on provided frame index. - void addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI); - - /// addToContextOwner - Add Die into the list of its context owner's children. - void addToContextOwner(DIE *Die, DIDescriptor Context); - - /// addType - Add a new type attribute to the specified entity. - void addType(DIE *Entity, DIType Ty); - - - /// getOrCreateNameSpace - Create a DIE for DINameSpace. - DIE *getOrCreateNameSpace(DINameSpace NS); - - /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the - /// given DIType. - DIE *getOrCreateTypeDIE(DIType Ty); - - /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE - /// for the given DITemplateTypeParameter. - DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP); - - /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE - /// for the given DITemplateValueParameter. - DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP); - - void addPubTypes(DISubprogram SP); - - /// constructTypeDIE - Construct basic type die from DIBasicType. - void constructTypeDIE(DIE &Buffer, - DIBasicType BTy); - - /// constructTypeDIE - Construct derived type die from DIDerivedType. - void constructTypeDIE(DIE &Buffer, - DIDerivedType DTy); - - /// constructTypeDIE - Construct type DIE from DICompositeType. - void constructTypeDIE(DIE &Buffer, - DICompositeType CTy); - - /// constructSubrangeDIE - Construct subrange DIE from DISubrange. - void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); - - /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. - void constructArrayTypeDIE(DIE &Buffer, - DICompositeType *CTy); - - /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. - DIE *constructEnumTypeDIE(DIEnumerator ETy); - - /// createMemberDIE - Create new member DIE. - DIE *createMemberDIE(DIDerivedType DT); - - /// createSubprogramDIE - Create new DIE using SP. - DIE *createSubprogramDIE(DISubprogram SP); - /// getOrCreateDbgScope - Create DbgScope for the scope. DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt); @@ -504,11 +394,6 @@ private: /// inlining instance. void emitDebugInlineInfo(); - /// GetOrCreateSourceID - Look up the source id with the given directory and - /// source file names. If none currently exists, create a new id and insert it - /// in the SourceIds map. - unsigned GetOrCreateSourceID(StringRef FullName); - /// constructCompileUnit - Create new CompileUnit for the given /// metadata node with tag DW_TAG_compile_unit. void constructCompileUnit(const MDNode *N); @@ -525,7 +410,7 @@ private: /// recordSourceLine - Register a source line with debug info. Returns the /// unique label that was emitted and which provides correspondence to /// the source line list. - MCSymbol *recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope); + void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope); /// recordVariableFrameIndex - Record a variable's index. void recordVariableFrameIndex(const DbgVariable *V, int Index); @@ -546,6 +431,11 @@ private: /// and collect DbgScopes. Return true, if atleast one scope was found. bool extractScopeInformation(); + /// addCurrentFnArgument - If Var is an current function argument that add + /// it in CurrentFnArguments list. + bool addCurrentFnArgument(const MachineFunction *MF, + DbgVariable *Var, DbgScope *Scope); + /// collectVariableInfo - Populate DbgScope entries with variables' info. void collectVariableInfo(const MachineFunction *, SmallPtrSet<const MDNode *, 16> &ProcessedVars); @@ -554,6 +444,23 @@ private: /// side table maintained by MMI. void collectVariableInfoFromMMITable(const MachineFunction * MF, SmallPtrSet<const MDNode *, 16> &P); + + /// requestLabelBeforeInsn - Ensure that a label will be emitted before MI. + void requestLabelBeforeInsn(const MachineInstr *MI) { + LabelsBeforeInsn.insert(std::make_pair(MI, (MCSymbol*)0)); + } + + /// getLabelBeforeInsn - Return Label preceding the instruction. + const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); + + /// requestLabelAfterInsn - Ensure that a label will be emitted after MI. + void requestLabelAfterInsn(const MachineInstr *MI) { + LabelsAfterInsn.insert(std::make_pair(MI, (MCSymbol*)0)); + } + + /// getLabelAfterInsn - Return Label immediately following the instruction. + const MCSymbol *getLabelAfterInsn(const MachineInstr *MI); + public: //===--------------------------------------------------------------------===// // Main entry points. @@ -577,17 +484,19 @@ public: /// void endFunction(const MachineFunction *MF); - /// getLabelBeforeInsn - Return Label preceding the instruction. - const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); - - /// getLabelAfterInsn - Return Label immediately following the instruction. - const MCSymbol *getLabelAfterInsn(const MachineInstr *MI); - /// beginInstruction - Process beginning of an instruction. void beginInstruction(const MachineInstr *MI); /// endInstruction - Prcess end of an instruction. void endInstruction(const MachineInstr *MI); + + /// GetOrCreateSourceID - Look up the source id with the given directory and + /// source file names. If none currently exists, create a new id and insert it + /// in the SourceIds map. + unsigned GetOrCreateSourceID(StringRef DirName, StringRef FullName); + + /// createSubprogramDIE - Create new DIE using SP. + DIE *createSubprogramDIE(DISubprogram SP); }; } // End of namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index a172e53..f111641 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -140,17 +140,18 @@ public: }; class DwarfCFIException : public DwarfException { - /// shouldEmitTable - Per-function flag to indicate if EH tables should - /// be emitted. - bool shouldEmitTable; + /// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality + /// should be emitted. + bool shouldEmitPersonality; + + /// shouldEmitLSDA - Per-function flag to indicate if .cfi_lsda + /// should be emitted. + bool shouldEmitLSDA; /// shouldEmitMoves - Per-function flag to indicate if frame moves info /// should be emitted. bool shouldEmitMoves; - /// shouldEmitTableModule - Per-module flag to indicate if EH tables - /// should be emitted. - bool shouldEmitTableModule; public: //===--------------------------------------------------------------------===// // Main entry points. @@ -237,6 +238,38 @@ public: virtual void EndFunction(); }; + +class ARMException : public DwarfException { + /// shouldEmitTable - Per-function flag to indicate if EH tables should + /// be emitted. + bool shouldEmitTable; + + /// shouldEmitMoves - Per-function flag to indicate if frame moves info + /// should be emitted. + bool shouldEmitMoves; + + /// shouldEmitTableModule - Per-module flag to indicate if EH tables + /// should be emitted. + bool shouldEmitTableModule; +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + ARMException(AsmPrinter *A); + virtual ~ARMException(); + + /// EndModule - Emit all exception information that should come after the + /// content. + virtual void EndModule(); + + /// BeginFunction - Gather pre-function exception information. Assumes being + /// emitted immediately after the function entry point. + virtual void BeginFunction(const MachineFunction *MF); + + /// EndFunction - Gather and emit post-function exception information. + virtual void EndFunction(); +}; + } // End of namespace llvm #endif diff --git a/lib/CodeGen/AsmPrinter/DwarfTableException.cpp b/lib/CodeGen/AsmPrinter/DwarfTableException.cpp index 7519011..b50d8bd 100644 --- a/lib/CodeGen/AsmPrinter/DwarfTableException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfTableException.cpp @@ -92,7 +92,7 @@ void DwarfTableException::EmitCIE(const Function *PersonalityFn, unsigned Index) // personality function reference: unsigned LSDAEncoding = TLOF.getLSDAEncoding(); - unsigned FDEEncoding = TLOF.getFDEEncoding(); + unsigned FDEEncoding = TLOF.getFDEEncoding(false); unsigned PerEncoding = TLOF.getPersonalityEncoding(); char Augmentation[6] = { 0 }; @@ -168,7 +168,7 @@ void DwarfTableException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); unsigned LSDAEncoding = TLOF.getLSDAEncoding(); - unsigned FDEEncoding = TLOF.getFDEEncoding(); + unsigned FDEEncoding = TLOF.getFDEEncoding(false); Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 78a8743..7704340 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -1048,7 +1048,7 @@ ReoptimizeBlock: // AnalyzeBranch. if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 && PrevBB.succ_size() == 1 && - !MBB->hasAddressTaken()) { + !MBB->hasAddressTaken() && !MBB->isLandingPad()) { DEBUG(dbgs() << "\nMerging into block: " << PrevBB << "From MBB: " << *MBB); PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end()); diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index d7d0e1b..2ca3859 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -19,6 +19,7 @@ add_llvm_library(LLVMCodeGen GCStrategy.cpp IfConversion.cpp InlineSpiller.cpp + InterferenceCache.cpp IntrinsicLowering.cpp LLVMTargetMachine.cpp LatencyPriorityQueue.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 76bb3d1..e5894b8 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -87,8 +87,8 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg, } void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { - MachineRegisterInfo &mri = mf_.getRegInfo(); - const TargetRegisterInfo &tri = *mf_.getTarget().getRegisterInfo(); + MachineRegisterInfo &mri = MF.getRegInfo(); + const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo(); MachineBasicBlock *mbb = 0; MachineLoop *loop = 0; unsigned loopDepth = 0; @@ -103,6 +103,9 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { // Don't recompute a target specific hint. bool noHint = mri.getRegAllocationHint(li.reg).first != 0; + // Don't recompute spill weight for an unspillable register. + bool Spillable = li.isSpillable(); + for (MachineRegisterInfo::reg_iterator I = mri.reg_begin(li.reg); MachineInstr *mi = I.skipInstruction();) { if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue()) @@ -110,34 +113,37 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { if (!visited.insert(mi)) continue; - // Get loop info for mi. - if (mi->getParent() != mbb) { - mbb = mi->getParent(); - loop = loops_.getLoopFor(mbb); - loopDepth = loop ? loop->getLoopDepth() : 0; - isExiting = loop ? loop->isLoopExiting(mbb) : false; + float weight = 1.0f; + if (Spillable) { + // Get loop info for mi. + if (mi->getParent() != mbb) { + mbb = mi->getParent(); + loop = Loops.getLoopFor(mbb); + loopDepth = loop ? loop->getLoopDepth() : 0; + isExiting = loop ? loop->isLoopExiting(mbb) : false; + } + + // Calculate instr weight. + bool reads, writes; + tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); + weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth); + + // Give extra weight to what looks like a loop induction variable update. + if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb)) + weight *= 3; + + totalWeight += weight; } - // Calculate instr weight. - bool reads, writes; - tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); - float weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth); - - // Give extra weight to what looks like a loop induction variable update. - if (writes && isExiting && lis_.isLiveOutOfMBB(li, mbb)) - weight *= 3; - - totalWeight += weight; - // Get allocation hints from copies. if (noHint || !mi->isCopy()) continue; unsigned hint = copyHint(mi, li.reg, tri, mri); if (!hint) continue; - float hweight = hint_[hint] += weight; + float hweight = Hint[hint] += weight; if (TargetRegisterInfo::isPhysicalRegister(hint)) { - if (hweight > bestPhys && lis_.isAllocatable(hint)) + if (hweight > bestPhys && LIS.isAllocatable(hint)) bestPhys = hweight, hintPhys = hint; } else { if (hweight > bestVirt) @@ -145,15 +151,19 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { } } - hint_.clear(); + Hint.clear(); // Always prefer the physreg hint. if (unsigned hint = hintPhys ? hintPhys : hintVirt) { mri.setRegAllocationHint(li.reg, 0, hint); - // Weakly boost the spill weifght of hinted registers. + // Weakly boost the spill weight of hinted registers. totalWeight *= 1.01F; } + // If the live interval was already unspillable, leave it that way. + if (!Spillable) + return; + // Mark li as unspillable if all live ranges are tiny. if (li.isZeroLength()) { li.markNotSpillable(); @@ -166,8 +176,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { // FIXME: this gets much more complicated once we support non-trivial // re-materialization. bool isLoad = false; - SmallVector<LiveInterval*, 4> spillIs; - if (lis_.isReMaterializable(li, spillIs, isLoad)) { + if (LIS.isReMaterializable(li, 0, isLoad)) { if (isLoad) totalWeight *= 0.9F; else @@ -178,50 +187,29 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { } void VirtRegAuxInfo::CalculateRegClass(unsigned reg) { - MachineRegisterInfo &mri = mf_.getRegInfo(); - const TargetRegisterInfo *tri = mf_.getTarget().getRegisterInfo(); - const TargetRegisterClass *orc = mri.getRegClass(reg); - SmallPtrSet<const TargetRegisterClass*,8> rcs; - - for (MachineRegisterInfo::reg_nodbg_iterator I = mri.reg_nodbg_begin(reg), - E = mri.reg_nodbg_end(); I != E; ++I) { - // The targets don't have accurate enough regclass descriptions that we can - // handle subregs. We need something similar to - // TRI::getMatchingSuperRegClass, but returning a super class instead of a - // sub class. - if (I.getOperand().getSubReg()) { - DEBUG(dbgs() << "Cannot handle subregs: " << I.getOperand() << '\n'); - return; - } - if (const TargetRegisterClass *rc = - I->getDesc().getRegClass(I.getOperandNo(), tri)) - rcs.insert(rc); - } + MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const TargetRegisterClass *OldRC = MRI.getRegClass(reg); + const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC); - // If we found no regclass constraints, just leave reg as is. - // In theory, we could inflate to the largest superclass of reg's existing - // class, but that might not be legal for the current cpu setting. - // This could happen if reg is only used by COPY instructions, so we may need - // to improve on this. - if (rcs.empty()) { + // Stop early if there is no room to grow. + if (NewRC == OldRC) return; - } - // Compute the intersection of all classes in rcs. - // This ought to be independent of iteration order, but if the target register - // classes don't form a proper algebra, it is possible to get different - // results. The solution is to make sure the intersection of any two register - // classes is also a register class or the null set. - const TargetRegisterClass *rc = 0; - for (SmallPtrSet<const TargetRegisterClass*,8>::iterator I = rcs.begin(), - E = rcs.end(); I != E; ++I) { - rc = rc ? getCommonSubClass(rc, *I) : *I; - assert(rc && "Incompatible regclass constraints found"); + // Accumulate constraints from all uses. + for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(reg), + E = MRI.reg_nodbg_end(); I != E; ++I) { + // TRI doesn't have accurate enough information to model this yet. + if (I.getOperand().getSubReg()) + return; + const TargetRegisterClass *OpRC = + I->getDesc().getRegClass(I.getOperandNo(), TRI); + if (OpRC) + NewRC = getCommonSubClass(NewRC, OpRC); + if (!NewRC || NewRC == OldRC) + return; } - - if (rc == orc) - return; - DEBUG(dbgs() << "Inflating " << orc->getName() << ':' << PrintReg(reg) - << " to " << rc->getName() <<".\n"); - mri.setRegClass(reg, rc); + DEBUG(dbgs() << "Inflating " << OldRC->getName() << ':' << PrintReg(reg) + << " to " << NewRC->getName() <<".\n"); + MRI.setRegClass(reg, NewRC); } diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index 2ad80b4..bfb6ba1 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -19,15 +19,18 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetLowering.h" using namespace llvm; CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm, SmallVector<CCValAssign, 16> &locs, LLVMContext &C) : CallingConv(CC), IsVarArg(isVarArg), TM(tm), - TRI(*TM.getRegisterInfo()), Locs(locs), Context(C) { + TRI(*TM.getRegisterInfo()), Locs(locs), Context(C), + CallOrPrologue(Invalid) { // No stack is used. StackOffset = 0; + clearFirstByValReg(); UsedRegs.resize((TRI.getNumRegs()+31)/32); } @@ -44,8 +47,8 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT, Size = MinSize; if (MinAlign > (int)Align) Align = MinAlign; + TM.getTargetLowering()->HandleByVal(const_cast<CCState*>(this), Size); unsigned Offset = AllocateStack(Size, Align); - addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); } @@ -155,7 +158,7 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) { #ifndef NDEBUG dbgs() << "Call result #" << i << " has unhandled type " - << EVT(VT).getEVTString(); + << EVT(VT).getEVTString() << "\n"; #endif llvm_unreachable(0); } diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index 91a9536..270c337 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file implements the pass that optimize code placement and align loop -// headers to target specific alignment boundary. +// This file implements the pass that optimizes code placement and aligns loop +// headers to target-specific alignment boundaries. // //===----------------------------------------------------------------------===// @@ -40,7 +40,7 @@ namespace { virtual bool runOnMachineFunction(MachineFunction &MF); virtual const char *getPassName() const { - return "Code Placement Optimizater"; + return "Code Placement Optimizer"; } virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -254,7 +254,7 @@ bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF, // Determine a position to move orphaned loop blocks to. If TopMBB is not // entered via fallthrough and BotMBB is exited via fallthrough, prepend them - // to the top of the loop to avoid loosing that fallthrough. Otherwise append + // to the top of the loop to avoid losing that fallthrough. Otherwise append // them to the bottom, even if it previously had a fallthrough, on the theory // that it's worth an extra branch to keep the loop contiguous. MachineFunction::iterator InsertPt = diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 0ebb5b0..34b1a39 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -93,7 +93,8 @@ namespace { /// with the eh.exception call. This recursively looks past instructions /// which don't change the EH pointer value, like casts or PHI nodes. bool FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, - SmallPtrSet<IntrinsicInst*, 8> &SelCalls); + SmallPtrSet<IntrinsicInst*, 8> &SelCalls, + SmallPtrSet<PHINode*, 32> &SeenPHIs); public: static char ID; // Pass identification, replacement for typeid. @@ -199,8 +200,8 @@ bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) { /// change the EH pointer value, like casts or PHI nodes. bool DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, - SmallPtrSet<IntrinsicInst*, 8> &SelCalls) { - SmallPtrSet<PHINode*, 32> SeenPHIs; + SmallPtrSet<IntrinsicInst*, 8> &SelCalls, + SmallPtrSet<PHINode*, 32> &SeenPHIs) { bool Changed = false; for (Value::use_iterator @@ -215,11 +216,11 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, if (Invoke->getCalledFunction() == URoR) URoRInvoke = true; } else if (CastInst *CI = dyn_cast<CastInst>(II)) { - Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls); + Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls, SeenPHIs); } else if (PHINode *PN = dyn_cast<PHINode>(II)) { if (SeenPHIs.insert(PN)) // Don't process a PHI node more than once. - Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls); + Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls, SeenPHIs); } } @@ -294,7 +295,8 @@ bool DwarfEHPrepare::HandleURoRInvokes() { bool URoRInvoke = false; SmallPtrSet<IntrinsicInst*, 8> SelCalls; - Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls); + SmallPtrSet<PHINode*, 32> SeenPHIs; + Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls, SeenPHIs); if (URoRInvoke) { // This EH pointer is being used by an invoke of an URoR instruction and @@ -437,8 +439,9 @@ bool DwarfEHPrepare::NormalizeLandingPads() { if (InVal == 0) { // Different unwind edges have different values. Create a new PHI node // in NewBB. - PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".unwind", - NewBB); + PHINode *NewPN = PHINode::Create(PN->getType(), + PN->getNumIncomingValues(), + PN->getName()+".unwind", NewBB); // Add an entry for each unwind edge, using the value from the old PHI. for (pred_iterator PI = PB; PI != PE; ++PI) NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI); diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h index e08feeb..5b63468 100644 --- a/lib/CodeGen/ELF.h +++ b/lib/CodeGen/ELF.h @@ -173,7 +173,7 @@ namespace llvm { unsigned Offset; // sh_offset - Offset from the file start unsigned Size; // sh_size - The section size. unsigned Link; // sh_link - Section header table index link. - unsigned Info; // sh_info - Auxillary information. + unsigned Info; // sh_info - Auxiliary information. unsigned Align; // sh_addralign - Alignment of section. unsigned EntSize; // sh_entsize - Size of entries in the section e diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp index 0fd1e8e..fa2319b 100644 --- a/lib/CodeGen/ELFWriter.cpp +++ b/lib/CodeGen/ELFWriter.cpp @@ -77,7 +77,7 @@ ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm) // Create the object code emitter object for this target. ElfCE = new ELFCodeEmitter(*this); - // Inital number of sections + // Initial number of sections NumSections = 0; } @@ -660,19 +660,21 @@ bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { /// EmitXXStructorList - Emit the ctor or dtor list. This just emits out the /// function pointers, ignoring the init priority. void ELFWriter::EmitXXStructorList(Constant *List, ELFSection &Xtor) { - // Should be an array of '{ int, void ()* }' structs. The first value is the + // Should be an array of '{ i32, void ()* }' structs. The first value is the // init priority, which we ignore. - if (!isa<ConstantArray>(List)) return; + if (List->isNullValue()) return; ConstantArray *InitList = cast<ConstantArray>(List); - for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) - if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){ - if (CS->getNumOperands() != 2) return; // Not array of 2-element structs. - - if (CS->getOperand(1)->isNullValue()) - return; // Found a null terminator, exit printing. - // Emit the function pointer. - EmitGlobalConstant(CS->getOperand(1), Xtor); - } + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { + if (InitList->getOperand(i)->isNullValue()) + continue; + ConstantStruct *CS = cast<ConstantStruct>(InitList->getOperand(i)); + + if (CS->getOperand(1)->isNullValue()) + continue; + + // Emit the function pointer. + EmitGlobalConstant(CS->getOperand(1), Xtor); + } } bool ELFWriter::runOnMachineFunction(MachineFunction &MF) { diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp index aed8bc9..646e014 100644 --- a/lib/CodeGen/EdgeBundles.cpp +++ b/lib/CodeGen/EdgeBundles.cpp @@ -53,6 +53,19 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) { EC.compress(); if (ViewEdgeBundles) view(); + + // Compute the reverse mapping. + Blocks.clear(); + Blocks.resize(getNumBundles()); + + for (unsigned i = 0, e = MF->getNumBlockIDs(); i != e; ++i) { + unsigned b0 = getBundle(i, 0); + unsigned b1 = getBundle(i, 1); + Blocks[b0].push_back(i); + if (b1 != b0) + Blocks[b1].push_back(i); + } + return false; } @@ -82,5 +95,3 @@ raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G, O << "}\n"; return O; } - - diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp index b5ec303..ebc2fc9 100644 --- a/lib/CodeGen/ExpandISelPseudos.cpp +++ b/lib/CodeGen/ExpandISelPseudos.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// Expand Psuedo-instructions produced by ISel. These are usually to allow +// Expand Pseudo-instructions produced by ISel. These are usually to allow // the expansion to contain control flow, such as a conditional move // implemented with a conditional branch and a phi, or an atomic operation // implemented with a loop. diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index db53b04..790200b 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -27,7 +27,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" @@ -146,10 +145,6 @@ namespace { : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {} }; - /// Roots - Basic blocks that do not have successors. These are the starting - /// points of Graph traversal. - std::vector<MachineBasicBlock*> Roots; - /// BBAnalysis - Results of if-conversion feasibility analysis indexed by /// basic block number. std::vector<BBInfo> BBAnalysis; @@ -287,11 +282,6 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { MF.RenumberBlocks(); BBAnalysis.resize(MF.getNumBlockIDs()); - // Look for root nodes, i.e. blocks without successors. - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - if (I->succ_empty()) - Roots.push_back(I); - std::vector<IfcvtToken*> Tokens; MadeChange = false; unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + @@ -406,7 +396,6 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { } Tokens.clear(); - Roots.clear(); BBAnalysis.clear(); if (MadeChange && IfCvtBranchFold) { @@ -924,13 +913,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, /// candidates. void IfConverter::AnalyzeBlocks(MachineFunction &MF, std::vector<IfcvtToken*> &Tokens) { - std::set<MachineBasicBlock*> Visited; - for (unsigned i = 0, e = Roots.size(); i != e; ++i) { - for (idf_ext_iterator<MachineBasicBlock*> I=idf_ext_begin(Roots[i],Visited), - E = idf_ext_end(Roots[i], Visited); I != E; ++I) { - MachineBasicBlock *BB = *I; - AnalyzeBlock(BB, Tokens); - } + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *BB = I; + AnalyzeBlock(BB, Tokens); } // Sort to favor more complex ifcvt scheme. diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 38e6c85..b1a33a6 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -19,41 +19,75 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -static cl::opt<bool> -VerifySpills("verify-spills", cl::desc("Verify after each spill/split")); - namespace { class InlineSpiller : public Spiller { - MachineFunctionPass &pass_; - MachineFunction &mf_; - LiveIntervals &lis_; - LiveStacks &lss_; - AliasAnalysis *aa_; - VirtRegMap &vrm_; - MachineFrameInfo &mfi_; - MachineRegisterInfo &mri_; - const TargetInstrInfo &tii_; - const TargetRegisterInfo &tri_; - const BitVector reserved_; + MachineFunctionPass &Pass; + MachineFunction &MF; + LiveIntervals &LIS; + LiveStacks &LSS; + AliasAnalysis *AA; + MachineDominatorTree &MDT; + MachineLoopInfo &Loops; + VirtRegMap &VRM; + MachineFrameInfo &MFI; + MachineRegisterInfo &MRI; + const TargetInstrInfo &TII; + const TargetRegisterInfo &TRI; // Variables that are valid during spill(), but used by multiple methods. - LiveRangeEdit *edit_; - const TargetRegisterClass *rc_; - int stackSlot_; + LiveRangeEdit *Edit; + LiveInterval *StackInt; + int StackSlot; + unsigned Original; + + // All registers to spill to StackSlot, including the main register. + SmallVector<unsigned, 8> RegsToSpill; + + // All COPY instructions to/from snippets. + // They are ignored since both operands refer to the same stack slot. + SmallPtrSet<MachineInstr*, 8> SnippetCopies; // Values that failed to remat at some point. - SmallPtrSet<VNInfo*, 8> usedValues_; + SmallPtrSet<VNInfo*, 8> UsedValues; + + // Information about a value that was defined by a copy from a sibling + // register. + struct SibValueInfo { + // True when all reaching defs were reloads: No spill is necessary. + bool AllDefsAreReloads; + + // The preferred register to spill. + unsigned SpillReg; + + // The value of SpillReg that should be spilled. + VNInfo *SpillVNI; + + // A defining instruction that is not a sibling copy or a reload, or NULL. + // This can be used as a template for rematerialization. + MachineInstr *DefMI; + + SibValueInfo(unsigned Reg, VNInfo *VNI) + : AllDefsAreReloads(false), SpillReg(Reg), SpillVNI(VNI), DefMI(0) {} + }; + + // Values in RegsToSpill defined by sibling copies. + typedef DenseMap<VNInfo*, SibValueInfo> SibValueMap; + SibValueMap SibValues; + + // Dead defs generated during spilling. + SmallVector<MachineInstr*, 8> DeadDefs; ~InlineSpiller() {} @@ -61,34 +95,52 @@ public: InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) - : pass_(pass), - mf_(mf), - lis_(pass.getAnalysis<LiveIntervals>()), - lss_(pass.getAnalysis<LiveStacks>()), - aa_(&pass.getAnalysis<AliasAnalysis>()), - vrm_(vrm), - mfi_(*mf.getFrameInfo()), - mri_(mf.getRegInfo()), - tii_(*mf.getTarget().getInstrInfo()), - tri_(*mf.getTarget().getRegisterInfo()), - reserved_(tri_.getReservedRegs(mf_)) {} - - void spill(LiveInterval *li, - SmallVectorImpl<LiveInterval*> &newIntervals, - const SmallVectorImpl<LiveInterval*> &spillIs); + : Pass(pass), + MF(mf), + LIS(pass.getAnalysis<LiveIntervals>()), + LSS(pass.getAnalysis<LiveStacks>()), + AA(&pass.getAnalysis<AliasAnalysis>()), + MDT(pass.getAnalysis<MachineDominatorTree>()), + Loops(pass.getAnalysis<MachineLoopInfo>()), + VRM(vrm), + MFI(*mf.getFrameInfo()), + MRI(mf.getRegInfo()), + TII(*mf.getTarget().getInstrInfo()), + TRI(*mf.getTarget().getRegisterInfo()) {} void spill(LiveRangeEdit &); private: - bool reMaterializeFor(MachineBasicBlock::iterator MI); + bool isSnippet(const LiveInterval &SnipLI); + void collectRegsToSpill(); + + bool isRegToSpill(unsigned Reg) { + return std::find(RegsToSpill.begin(), + RegsToSpill.end(), Reg) != RegsToSpill.end(); + } + + bool isSibling(unsigned Reg); + MachineInstr *traceSiblingValue(unsigned, VNInfo*, VNInfo*); + void analyzeSiblingValues(); + + bool hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI); + void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI); + + void markValueUsed(LiveInterval*, VNInfo*); + bool reMaterializeFor(LiveInterval&, MachineBasicBlock::iterator MI); void reMaterializeAll(); - bool coalesceStackAccess(MachineInstr *MI); + bool coalesceStackAccess(MachineInstr *MI, unsigned Reg); bool foldMemoryOperand(MachineBasicBlock::iterator MI, const SmallVectorImpl<unsigned> &Ops, MachineInstr *LoadMI = 0); - void insertReload(LiveInterval &NewLI, MachineBasicBlock::iterator MI); - void insertSpill(LiveInterval &NewLI, MachineBasicBlock::iterator MI); + void insertReload(LiveInterval &NewLI, SlotIndex, + MachineBasicBlock::iterator MI); + void insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, + SlotIndex, MachineBasicBlock::iterator MI); + + void spillAroundUses(unsigned Reg); + void spillAll(); }; } @@ -96,45 +148,489 @@ namespace llvm { Spiller *createInlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) { - if (VerifySpills) - mf.verify(&pass, "When creating inline spiller"); return new InlineSpiller(pass, mf, vrm); } } +//===----------------------------------------------------------------------===// +// Snippets +//===----------------------------------------------------------------------===// + +// When spilling a virtual register, we also spill any snippets it is connected +// to. The snippets are small live ranges that only have a single real use, +// leftovers from live range splitting. Spilling them enables memory operand +// folding or tightens the live range around the single use. +// +// This minimizes register pressure and maximizes the store-to-load distance for +// spill slots which can be important in tight loops. + +/// isFullCopyOf - If MI is a COPY to or from Reg, return the other register, +/// otherwise return 0. +static unsigned isFullCopyOf(const MachineInstr *MI, unsigned Reg) { + if (!MI->isCopy()) + return 0; + if (MI->getOperand(0).getSubReg() != 0) + return 0; + if (MI->getOperand(1).getSubReg() != 0) + return 0; + if (MI->getOperand(0).getReg() == Reg) + return MI->getOperand(1).getReg(); + if (MI->getOperand(1).getReg() == Reg) + return MI->getOperand(0).getReg(); + return 0; +} + +/// isSnippet - Identify if a live interval is a snippet that should be spilled. +/// It is assumed that SnipLI is a virtual register with the same original as +/// Edit->getReg(). +bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) { + unsigned Reg = Edit->getReg(); + + // A snippet is a tiny live range with only a single instruction using it + // besides copies to/from Reg or spills/fills. We accept: + // + // %snip = COPY %Reg / FILL fi# + // %snip = USE %snip + // %Reg = COPY %snip / SPILL %snip, fi# + // + if (SnipLI.getNumValNums() > 2 || !LIS.intervalIsInOneMBB(SnipLI)) + return false; + + MachineInstr *UseMI = 0; + + // Check that all uses satisfy our criteria. + for (MachineRegisterInfo::reg_nodbg_iterator + RI = MRI.reg_nodbg_begin(SnipLI.reg); + MachineInstr *MI = RI.skipInstruction();) { + + // Allow copies to/from Reg. + if (isFullCopyOf(MI, Reg)) + continue; + + // Allow stack slot loads. + int FI; + if (SnipLI.reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) + continue; + + // Allow stack slot stores. + if (SnipLI.reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) + continue; + + // Allow a single additional instruction. + if (UseMI && MI != UseMI) + return false; + UseMI = MI; + } + return true; +} + +/// collectRegsToSpill - Collect live range snippets that only have a single +/// real use. +void InlineSpiller::collectRegsToSpill() { + unsigned Reg = Edit->getReg(); + + // Main register always spills. + RegsToSpill.assign(1, Reg); + SnippetCopies.clear(); + + // Snippets all have the same original, so there can't be any for an original + // register. + if (Original == Reg) + return; + + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg); + MachineInstr *MI = RI.skipInstruction();) { + unsigned SnipReg = isFullCopyOf(MI, Reg); + if (!isSibling(SnipReg)) + continue; + LiveInterval &SnipLI = LIS.getInterval(SnipReg); + if (!isSnippet(SnipLI)) + continue; + SnippetCopies.insert(MI); + if (!isRegToSpill(SnipReg)) + RegsToSpill.push_back(SnipReg); + + DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n'); + } +} + + +//===----------------------------------------------------------------------===// +// Sibling Values +//===----------------------------------------------------------------------===// + +// After live range splitting, some values to be spilled may be defined by +// copies from sibling registers. We trace the sibling copies back to the +// original value if it still exists. We need it for rematerialization. +// +// Even when the value can't be rematerialized, we still want to determine if +// the value has already been spilled, or we may want to hoist the spill from a +// loop. + +bool InlineSpiller::isSibling(unsigned Reg) { + return TargetRegisterInfo::isVirtualRegister(Reg) && + VRM.getOriginal(Reg) == Original; +} + +/// traceSiblingValue - Trace a value that is about to be spilled back to the +/// real defining instructions by looking through sibling copies. Always stay +/// within the range of OrigVNI so the registers are known to carry the same +/// value. +/// +/// Determine if the value is defined by all reloads, so spilling isn't +/// necessary - the value is already in the stack slot. +/// +/// Return a defining instruction that may be a candidate for rematerialization. +/// +MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, + VNInfo *OrigVNI) { + DEBUG(dbgs() << "Tracing value " << PrintReg(UseReg) << ':' + << UseVNI->id << '@' << UseVNI->def << '\n'); + SmallPtrSet<VNInfo*, 8> Visited; + SmallVector<std::pair<unsigned, VNInfo*>, 8> WorkList; + WorkList.push_back(std::make_pair(UseReg, UseVNI)); + + // Best spill candidate seen so far. This must dominate UseVNI. + SibValueInfo SVI(UseReg, UseVNI); + MachineBasicBlock *UseMBB = LIS.getMBBFromIndex(UseVNI->def); + unsigned SpillDepth = Loops.getLoopDepth(UseMBB); + bool SeenOrigPHI = false; // Original PHI met. + + do { + unsigned Reg; + VNInfo *VNI; + tie(Reg, VNI) = WorkList.pop_back_val(); + if (!Visited.insert(VNI)) + continue; + + // Is this value a better spill candidate? + if (!isRegToSpill(Reg)) { + MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); + if (MBB != UseMBB && MDT.dominates(MBB, UseMBB)) { + // This is a valid spill location dominating UseVNI. + // Prefer to spill at a smaller loop depth. + unsigned Depth = Loops.getLoopDepth(MBB); + if (Depth < SpillDepth) { + DEBUG(dbgs() << " spill depth " << Depth << ": " << PrintReg(Reg) + << ':' << VNI->id << '@' << VNI->def << '\n'); + SVI.SpillReg = Reg; + SVI.SpillVNI = VNI; + SpillDepth = Depth; + } + } + } + + // Trace through PHI-defs created by live range splitting. + if (VNI->isPHIDef()) { + if (VNI->def == OrigVNI->def) { + DEBUG(dbgs() << " orig phi value " << PrintReg(Reg) << ':' + << VNI->id << '@' << VNI->def << '\n'); + SeenOrigPHI = true; + continue; + } + // Get values live-out of predecessors. + LiveInterval &LI = LIS.getInterval(Reg); + MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + VNInfo *PVNI = LI.getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot()); + if (PVNI) + WorkList.push_back(std::make_pair(Reg, PVNI)); + } + continue; + } + + MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); + assert(MI && "Missing def"); + + // Trace through sibling copies. + if (unsigned SrcReg = isFullCopyOf(MI, Reg)) { + if (isSibling(SrcReg)) { + LiveInterval &SrcLI = LIS.getInterval(SrcReg); + VNInfo *SrcVNI = SrcLI.getVNInfoAt(VNI->def.getUseIndex()); + assert(SrcVNI && "Copy from non-existing value"); + DEBUG(dbgs() << " copy of " << PrintReg(SrcReg) << ':' + << SrcVNI->id << '@' << SrcVNI->def << '\n'); + WorkList.push_back(std::make_pair(SrcReg, SrcVNI)); + continue; + } + } + + // Track reachable reloads. + int FI; + if (Reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) { + DEBUG(dbgs() << " reload " << PrintReg(Reg) << ':' + << VNI->id << "@" << VNI->def << '\n'); + SVI.AllDefsAreReloads = true; + continue; + } + + // We have an 'original' def. Don't record trivial cases. + if (VNI == UseVNI) { + DEBUG(dbgs() << "Not a sibling copy.\n"); + return MI; + } + + // Potential remat candidate. + DEBUG(dbgs() << " def " << PrintReg(Reg) << ':' + << VNI->id << '@' << VNI->def << '\t' << *MI); + SVI.DefMI = MI; + } while (!WorkList.empty()); + + if (SeenOrigPHI || SVI.DefMI) + SVI.AllDefsAreReloads = false; + + DEBUG({ + if (SVI.AllDefsAreReloads) + dbgs() << "All defs are reloads.\n"; + else + dbgs() << "Prefer to spill " << PrintReg(SVI.SpillReg) << ':' + << SVI.SpillVNI->id << '@' << SVI.SpillVNI->def << '\n'; + }); + SibValues.insert(std::make_pair(UseVNI, SVI)); + return SVI.DefMI; +} + +/// analyzeSiblingValues - Trace values defined by sibling copies back to +/// something that isn't a sibling copy. +/// +/// Keep track of values that may be rematerializable. +void InlineSpiller::analyzeSiblingValues() { + SibValues.clear(); + + // No siblings at all? + if (Edit->getReg() == Original) + return; + + LiveInterval &OrigLI = LIS.getInterval(Original); + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { + unsigned Reg = RegsToSpill[i]; + LiveInterval &LI = LIS.getInterval(Reg); + for (LiveInterval::const_vni_iterator VI = LI.vni_begin(), + VE = LI.vni_end(); VI != VE; ++VI) { + VNInfo *VNI = *VI; + if (VNI->isUnused()) + continue; + MachineInstr *DefMI = 0; + // Check possible sibling copies. + if (VNI->isPHIDef() || VNI->getCopy()) { + VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def); + if (OrigVNI->def != VNI->def) + DefMI = traceSiblingValue(Reg, VNI, OrigVNI); + } + if (!DefMI && !VNI->isPHIDef()) + DefMI = LIS.getInstructionFromIndex(VNI->def); + if (DefMI && Edit->checkRematerializable(VNI, DefMI, TII, AA)) { + DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@' + << VNI->def << " may remat from " << *DefMI); + } + } + } +} + +/// hoistSpill - Given a sibling copy that defines a value to be spilled, insert +/// a spill at a better location. +bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) { + SlotIndex Idx = LIS.getInstructionIndex(CopyMI); + VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getDefIndex()); + assert(VNI && VNI->def == Idx.getDefIndex() && "Not defined by copy"); + SibValueMap::iterator I = SibValues.find(VNI); + if (I == SibValues.end()) + return false; + + const SibValueInfo &SVI = I->second; + + // Let the normal folding code deal with the boring case. + if (!SVI.AllDefsAreReloads && SVI.SpillVNI == VNI) + return false; + + // SpillReg may have been deleted by remat and DCE. + if (!LIS.hasInterval(SVI.SpillReg)) { + DEBUG(dbgs() << "Stale interval: " << PrintReg(SVI.SpillReg) << '\n'); + SibValues.erase(I); + return false; + } + + LiveInterval &SibLI = LIS.getInterval(SVI.SpillReg); + if (!SibLI.containsValue(SVI.SpillVNI)) { + DEBUG(dbgs() << "Stale value: " << PrintReg(SVI.SpillReg) << '\n'); + SibValues.erase(I); + return false; + } + + // Conservatively extend the stack slot range to the range of the original + // value. We may be able to do better with stack slot coloring by being more + // careful here. + assert(StackInt && "No stack slot assigned yet."); + LiveInterval &OrigLI = LIS.getInterval(Original); + VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx); + StackInt->MergeValueInAsValue(OrigLI, OrigVNI, StackInt->getValNumInfo(0)); + DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": " + << *StackInt << '\n'); + + // Already spilled everywhere. + if (SVI.AllDefsAreReloads) + return true; + + // We are going to spill SVI.SpillVNI immediately after its def, so clear out + // any later spills of the same value. + eliminateRedundantSpills(SibLI, SVI.SpillVNI); + + MachineBasicBlock *MBB = LIS.getMBBFromIndex(SVI.SpillVNI->def); + MachineBasicBlock::iterator MII; + if (SVI.SpillVNI->isPHIDef()) + MII = MBB->SkipPHIsAndLabels(MBB->begin()); + else { + MachineInstr *DefMI = LIS.getInstructionFromIndex(SVI.SpillVNI->def); + assert(DefMI && "Defining instruction disappeared"); + MII = DefMI; + ++MII; + } + // Insert spill without kill flag immediately after def. + TII.storeRegToStackSlot(*MBB, MII, SVI.SpillReg, false, StackSlot, + MRI.getRegClass(SVI.SpillReg), &TRI); + --MII; // Point to store instruction. + LIS.InsertMachineInstrInMaps(MII); + VRM.addSpillSlotUse(StackSlot, MII); + DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII); + return true; +} + +/// eliminateRedundantSpills - SLI:VNI is known to be on the stack. Remove any +/// redundant spills of this value in SLI.reg and sibling copies. +void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { + assert(VNI && "Missing value"); + SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList; + WorkList.push_back(std::make_pair(&SLI, VNI)); + assert(StackInt && "No stack slot assigned yet."); + + do { + LiveInterval *LI; + tie(LI, VNI) = WorkList.pop_back_val(); + unsigned Reg = LI->reg; + DEBUG(dbgs() << "Checking redundant spills for " + << VNI->id << '@' << VNI->def << " in " << *LI << '\n'); + + // Regs to spill are taken care of. + if (isRegToSpill(Reg)) + continue; + + // Add all of VNI's live range to StackInt. + StackInt->MergeValueInAsValue(*LI, VNI, StackInt->getValNumInfo(0)); + DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n'); + + // Find all spills and copies of VNI. + for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg); + MachineInstr *MI = UI.skipInstruction();) { + if (!MI->isCopy() && !MI->getDesc().mayStore()) + continue; + SlotIndex Idx = LIS.getInstructionIndex(MI); + if (LI->getVNInfoAt(Idx) != VNI) + continue; + + // Follow sibling copies down the dominator tree. + if (unsigned DstReg = isFullCopyOf(MI, Reg)) { + if (isSibling(DstReg)) { + LiveInterval &DstLI = LIS.getInterval(DstReg); + VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getDefIndex()); + assert(DstVNI && "Missing defined value"); + assert(DstVNI->def == Idx.getDefIndex() && "Wrong copy def slot"); + WorkList.push_back(std::make_pair(&DstLI, DstVNI)); + } + continue; + } + + // Erase spills. + int FI; + if (Reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) { + DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << *MI); + // eliminateDeadDefs won't normally remove stores, so switch opcode. + MI->setDesc(TII.get(TargetOpcode::KILL)); + DeadDefs.push_back(MI); + } + } + } while (!WorkList.empty()); +} + + +//===----------------------------------------------------------------------===// +// Rematerialization +//===----------------------------------------------------------------------===// + +/// markValueUsed - Remember that VNI failed to rematerialize, so its defining +/// instruction cannot be eliminated. See through snippet copies +void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { + SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList; + WorkList.push_back(std::make_pair(LI, VNI)); + do { + tie(LI, VNI) = WorkList.pop_back_val(); + if (!UsedValues.insert(VNI)) + continue; + + if (VNI->isPHIDef()) { + MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + VNInfo *PVNI = LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot()); + if (PVNI) + WorkList.push_back(std::make_pair(LI, PVNI)); + } + continue; + } + + // Follow snippet copies. + MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); + if (!SnippetCopies.count(MI)) + continue; + LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg()); + assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy"); + VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getUseIndex()); + assert(SnipVNI && "Snippet undefined before copy"); + WorkList.push_back(std::make_pair(&SnipLI, SnipVNI)); + } while (!WorkList.empty()); +} + /// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. -bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) { - SlotIndex UseIdx = lis_.getInstructionIndex(MI).getUseIndex(); - VNInfo *OrigVNI = edit_->getParent().getVNInfoAt(UseIdx); +bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, + MachineBasicBlock::iterator MI) { + SlotIndex UseIdx = LIS.getInstructionIndex(MI).getUseIndex(); + VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx); - if (!OrigVNI) { + if (!ParentVNI) { DEBUG(dbgs() << "\tadding <undef> flags: "); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isUse() && MO.getReg() == edit_->getReg()) + if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) MO.setIsUndef(); } DEBUG(dbgs() << UseIdx << '\t' << *MI); return true; } - LiveRangeEdit::Remat RM(OrigVNI); - if (!edit_->canRematerializeAt(RM, UseIdx, false, lis_)) { - usedValues_.insert(OrigVNI); + if (SnippetCopies.count(MI)) + return false; + + // Use an OrigVNI from traceSiblingValue when ParentVNI is a sibling copy. + LiveRangeEdit::Remat RM(ParentVNI); + SibValueMap::const_iterator SibI = SibValues.find(ParentVNI); + if (SibI != SibValues.end()) + RM.OrigMI = SibI->second.DefMI; + if (!Edit->canRematerializeAt(RM, UseIdx, false, LIS)) { + markValueUsed(&VirtReg, ParentVNI); DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI); return false; } - // If the instruction also writes edit_->getReg(), it had better not require - // the same register for uses and defs. + // If the instruction also writes VirtReg.reg, it had better not require the + // same register for uses and defs. bool Reads, Writes; SmallVector<unsigned, 8> Ops; - tie(Reads, Writes) = MI->readsWritesVirtualRegister(edit_->getReg(), &Ops); + tie(Reads, Writes) = MI->readsWritesVirtualRegister(VirtReg.reg, &Ops); if (Writes) { for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = MI->getOperand(Ops[i]); if (MO.isUse() ? MI->isRegTiedToDefOperand(Ops[i]) : MO.getSubReg()) { - usedValues_.insert(OrigVNI); + markValueUsed(&VirtReg, ParentVNI); DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI); return false; } @@ -145,35 +641,31 @@ bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) { // fold a load into the instruction. That avoids allocating a new register. if (RM.OrigMI->getDesc().canFoldAsLoad() && foldMemoryOperand(MI, Ops, RM.OrigMI)) { - edit_->markRematerialized(RM.ParentVNI); + Edit->markRematerialized(RM.ParentVNI); return true; } // Alocate a new register for the remat. - LiveInterval &NewLI = edit_->create(mri_, lis_, vrm_); + LiveInterval &NewLI = Edit->createFrom(Original, LIS, VRM); NewLI.markNotSpillable(); - // Rematting for a copy: Set allocation hint to be the destination register. - if (MI->isCopy()) - mri_.setRegAllocationHint(NewLI.reg, 0, MI->getOperand(0).getReg()); - // Finally we can rematerialize OrigMI before MI. - SlotIndex DefIdx = edit_->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM, - lis_, tii_, tri_); + SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM, + LIS, TII, TRI); DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' - << *lis_.getInstructionFromIndex(DefIdx)); + << *LIS.getInstructionFromIndex(DefIdx)); // Replace operands for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = MI->getOperand(Ops[i]); - if (MO.isReg() && MO.isUse() && MO.getReg() == edit_->getReg()) { + if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) { MO.setReg(NewLI.reg); MO.setIsKill(); } } DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI); - VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, lis_.getVNInfoAllocator()); + VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI)); DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); return true; @@ -182,75 +674,85 @@ bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) { /// reMaterializeAll - Try to rematerialize as many uses as possible, /// and trim the live ranges after. void InlineSpiller::reMaterializeAll() { - // Do a quick scan of the interval values to find if any are remattable. - if (!edit_->anyRematerializable(lis_, tii_, aa_)) + // analyzeSiblingValues has already tested all relevant defining instructions. + if (!Edit->anyRematerializable(LIS, TII, AA)) return; - usedValues_.clear(); + UsedValues.clear(); - // Try to remat before all uses of edit_->getReg(). + // Try to remat before all uses of snippets. bool anyRemat = false; - for (MachineRegisterInfo::use_nodbg_iterator - RI = mri_.use_nodbg_begin(edit_->getReg()); - MachineInstr *MI = RI.skipInstruction();) - anyRemat |= reMaterializeFor(MI); - + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { + unsigned Reg = RegsToSpill[i]; + LiveInterval &LI = LIS.getInterval(Reg); + for (MachineRegisterInfo::use_nodbg_iterator + RI = MRI.use_nodbg_begin(Reg); + MachineInstr *MI = RI.skipInstruction();) + anyRemat |= reMaterializeFor(LI, MI); + } if (!anyRemat) return; // Remove any values that were completely rematted. - bool anyRemoved = false; - for (LiveInterval::vni_iterator I = edit_->getParent().vni_begin(), - E = edit_->getParent().vni_end(); I != E; ++I) { - VNInfo *VNI = *I; - if (VNI->hasPHIKill() || !edit_->didRematerialize(VNI) || - usedValues_.count(VNI)) - continue; - MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def); - DEBUG(dbgs() << "\tremoving dead def: " << VNI->def << '\t' << *DefMI); - lis_.RemoveMachineInstrFromMaps(DefMI); - vrm_.RemoveMachineInstrFromMaps(DefMI); - DefMI->eraseFromParent(); - VNI->def = SlotIndex(); - anyRemoved = true; + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { + unsigned Reg = RegsToSpill[i]; + LiveInterval &LI = LIS.getInterval(Reg); + for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I != E; ++I) { + VNInfo *VNI = *I; + if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(VNI)) + continue; + MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); + MI->addRegisterDead(Reg, &TRI); + if (!MI->allDefsAreDead()) + continue; + DEBUG(dbgs() << "All defs dead: " << *MI); + DeadDefs.push_back(MI); + } } - if (!anyRemoved) + // Eliminate dead code after remat. Note that some snippet copies may be + // deleted here. + if (DeadDefs.empty()) return; - - // Removing values may cause debug uses where parent is not live. - for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(edit_->getReg()); - MachineInstr *MI = RI.skipInstruction();) { - if (!MI->isDebugValue()) + DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n"); + Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII); + + // Get rid of deleted and empty intervals. + for (unsigned i = RegsToSpill.size(); i != 0; --i) { + unsigned Reg = RegsToSpill[i-1]; + if (!LIS.hasInterval(Reg)) { + RegsToSpill.erase(RegsToSpill.begin() + (i - 1)); continue; - // Try to preserve the debug value if parent is live immediately after it. - MachineBasicBlock::iterator NextMI = MI; - ++NextMI; - if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) { - SlotIndex Idx = lis_.getInstructionIndex(NextMI); - VNInfo *VNI = edit_->getParent().getVNInfoAt(Idx); - if (VNI && (VNI->hasPHIKill() || usedValues_.count(VNI))) - continue; } - DEBUG(dbgs() << "Removing debug info due to remat:" << "\t" << *MI); - MI->eraseFromParent(); + LiveInterval &LI = LIS.getInterval(Reg); + if (!LI.empty()) + continue; + Edit->eraseVirtReg(Reg, LIS); + RegsToSpill.erase(RegsToSpill.begin() + (i - 1)); } + DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n"); } -/// If MI is a load or store of stackSlot_, it can be removed. -bool InlineSpiller::coalesceStackAccess(MachineInstr *MI) { + +//===----------------------------------------------------------------------===// +// Spilling +//===----------------------------------------------------------------------===// + +/// If MI is a load or store of StackSlot, it can be removed. +bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) { int FI = 0; - unsigned reg; - if (!(reg = tii_.isLoadFromStackSlot(MI, FI)) && - !(reg = tii_.isStoreToStackSlot(MI, FI))) + unsigned InstrReg; + if (!(InstrReg = TII.isLoadFromStackSlot(MI, FI)) && + !(InstrReg = TII.isStoreToStackSlot(MI, FI))) return false; // We have a stack access. Is it the right register and slot? - if (reg != edit_->getReg() || FI != stackSlot_) + if (InstrReg != Reg || FI != StackSlot) return false; DEBUG(dbgs() << "Coalescing stack access: " << *MI); - lis_.RemoveMachineInstrFromMaps(MI); + LIS.RemoveMachineInstrFromMaps(MI); MI->eraseFromParent(); return true; } @@ -283,13 +785,13 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, } MachineInstr *FoldMI = - LoadMI ? tii_.foldMemoryOperand(MI, FoldOps, LoadMI) - : tii_.foldMemoryOperand(MI, FoldOps, stackSlot_); + LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI) + : TII.foldMemoryOperand(MI, FoldOps, StackSlot); if (!FoldMI) return false; - lis_.ReplaceMachineInstrInMaps(MI, FoldMI); + LIS.ReplaceMachineInstrInMaps(MI, FoldMI); if (!LoadMI) - vrm_.addSpillSlotUse(stackSlot_, FoldMI); + VRM.addSpillSlotUse(StackSlot, FoldMI); MI->eraseFromParent(); DEBUG(dbgs() << "\tfolded: " << *FoldMI); return true; @@ -297,84 +799,40 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, /// insertReload - Insert a reload of NewLI.reg before MI. void InlineSpiller::insertReload(LiveInterval &NewLI, + SlotIndex Idx, MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); - SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex(); - tii_.loadRegFromStackSlot(MBB, MI, NewLI.reg, stackSlot_, rc_, &tri_); + TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot, + MRI.getRegClass(NewLI.reg), &TRI); --MI; // Point to load instruction. - SlotIndex LoadIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); - vrm_.addSpillSlotUse(stackSlot_, MI); + SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex(); + VRM.addSpillSlotUse(StackSlot, MI); DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI); VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0, - lis_.getVNInfoAllocator()); + LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI)); } /// insertSpill - Insert a spill of NewLI.reg after MI. -void InlineSpiller::insertSpill(LiveInterval &NewLI, - MachineBasicBlock::iterator MI) { +void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, + SlotIndex Idx, MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); - - // Get the defined value. It could be an early clobber so keep the def index. - SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex(); - VNInfo *VNI = edit_->getParent().getVNInfoAt(Idx); - assert(VNI && VNI->def.getDefIndex() == Idx && "Inconsistent VNInfo"); - Idx = VNI->def; - - tii_.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, stackSlot_, rc_, &tri_); + TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot, + MRI.getRegClass(NewLI.reg), &TRI); --MI; // Point to store instruction. - SlotIndex StoreIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); - vrm_.addSpillSlotUse(stackSlot_, MI); + SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex(); + VRM.addSpillSlotUse(StackSlot, MI); DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI); - VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, lis_.getVNInfoAllocator()); + VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI)); } -void InlineSpiller::spill(LiveInterval *li, - SmallVectorImpl<LiveInterval*> &newIntervals, - const SmallVectorImpl<LiveInterval*> &spillIs) { - LiveRangeEdit edit(*li, newIntervals, spillIs); - spill(edit); - if (VerifySpills) - mf_.verify(&pass_, "After inline spill"); -} - -void InlineSpiller::spill(LiveRangeEdit &edit) { - edit_ = &edit; - assert(!TargetRegisterInfo::isStackSlot(edit.getReg()) - && "Trying to spill a stack slot."); - DEBUG(dbgs() << "Inline spilling " - << mri_.getRegClass(edit.getReg())->getName() - << ':' << edit.getParent() << "\nFrom original " - << PrintReg(vrm_.getOriginal(edit.getReg())) << '\n'); - assert(edit.getParent().isSpillable() && - "Attempting to spill already spilled value."); - - reMaterializeAll(); +/// spillAroundUses - insert spill code around each use of Reg. +void InlineSpiller::spillAroundUses(unsigned Reg) { + LiveInterval &OldLI = LIS.getInterval(Reg); - // Remat may handle everything. - if (edit_->getParent().empty()) - return; - - rc_ = mri_.getRegClass(edit.getReg()); - - // Share a stack slot among all descendants of Orig. - unsigned Orig = vrm_.getOriginal(edit.getReg()); - stackSlot_ = vrm_.getStackSlot(Orig); - if (stackSlot_ == VirtRegMap::NO_STACK_SLOT) - stackSlot_ = vrm_.assignVirt2StackSlot(Orig); - - if (Orig != edit.getReg()) - vrm_.assignVirt2StackSlot(edit.getReg(), stackSlot_); - - // Update LiveStacks now that we are committed to spilling. - LiveInterval &stacklvr = lss_.getOrCreateInterval(stackSlot_, rc_); - if (!stacklvr.hasAtLeastOneValue()) - stacklvr.getNextValue(SlotIndex(), 0, lss_.getVNInfoAllocator()); - stacklvr.MergeRangesInAsValue(edit_->getParent(), stacklvr.getValNumInfo(0)); - - // Iterate over instructions using register. - for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(edit.getReg()); + // Iterate over instructions using Reg. + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg); MachineInstr *MI = RI.skipInstruction();) { // Debug values are not allowed to affect codegen. @@ -383,7 +841,7 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { uint64_t Offset = MI->getOperand(1).getImm(); const MDNode *MDPtr = MI->getOperand(2).getMetadata(); DebugLoc DL = MI->getDebugLoc(); - if (MachineInstr *NewDV = tii_.emitFrameIndexDebugValue(mf_, stackSlot_, + if (MachineInstr *NewDV = TII.emitFrameIndexDebugValue(MF, StackSlot, Offset, MDPtr, DL)) { DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); MachineBasicBlock *MBB = MI->getParent(); @@ -395,14 +853,44 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { continue; } + // Ignore copies to/from snippets. We'll delete them. + if (SnippetCopies.count(MI)) + continue; + // Stack slot accesses may coalesce away. - if (coalesceStackAccess(MI)) + if (coalesceStackAccess(MI, Reg)) continue; // Analyze instruction. bool Reads, Writes; SmallVector<unsigned, 8> Ops; - tie(Reads, Writes) = MI->readsWritesVirtualRegister(edit.getReg(), &Ops); + tie(Reads, Writes) = MI->readsWritesVirtualRegister(Reg, &Ops); + + // Find the slot index where this instruction reads and writes OldLI. + // This is usually the def slot, except for tied early clobbers. + SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex(); + if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getUseIndex())) + if (SlotIndex::isSameInstr(Idx, VNI->def)) + Idx = VNI->def; + + // Check for a sibling copy. + unsigned SibReg = isFullCopyOf(MI, Reg); + if (SibReg && isSibling(SibReg)) { + if (Writes) { + // Hoist the spill of a sib-reg copy. + if (hoistSpill(OldLI, MI)) { + // This COPY is now dead, the value is already in the stack slot. + MI->getOperand(0).setIsDead(); + DeadDefs.push_back(MI); + continue; + } + } else { + // This is a reload for a sib-reg copy. Drop spills downstream. + LiveInterval &SibLI = LIS.getInterval(SibReg); + eliminateRedundantSpills(SibLI, SibLI.getVNInfoAt(Idx)); + // The COPY will fold to a reload below. + } + } // Attempt to fold memory ops. if (foldMemoryOperand(MI, Ops)) @@ -410,11 +898,11 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { // Allocate interval around instruction. // FIXME: Infer regclass from instruction alone. - LiveInterval &NewLI = edit.create(mri_, lis_, vrm_); + LiveInterval &NewLI = Edit->createFrom(Reg, LIS, VRM); NewLI.markNotSpillable(); if (Reads) - insertReload(NewLI, MI); + insertReload(NewLI, Idx, MI); // Rewrite instruction operands. bool hasLiveDef = false; @@ -429,11 +917,84 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { hasLiveDef = true; } } + DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI); // FIXME: Use a second vreg if instruction has no tied ops. if (Writes && hasLiveDef) - insertSpill(NewLI, MI); + insertSpill(NewLI, OldLI, Idx, MI); DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); } } + +/// spillAll - Spill all registers remaining after rematerialization. +void InlineSpiller::spillAll() { + // Update LiveStacks now that we are committed to spilling. + if (StackSlot == VirtRegMap::NO_STACK_SLOT) { + StackSlot = VRM.assignVirt2StackSlot(Original); + StackInt = &LSS.getOrCreateInterval(StackSlot, MRI.getRegClass(Original)); + StackInt->getNextValue(SlotIndex(), 0, LSS.getVNInfoAllocator()); + } else + StackInt = &LSS.getInterval(StackSlot); + + if (Original != Edit->getReg()) + VRM.assignVirt2StackSlot(Edit->getReg(), StackSlot); + + assert(StackInt->getNumValNums() == 1 && "Bad stack interval values"); + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) + StackInt->MergeRangesInAsValue(LIS.getInterval(RegsToSpill[i]), + StackInt->getValNumInfo(0)); + DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n'); + + // Spill around uses of all RegsToSpill. + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) + spillAroundUses(RegsToSpill[i]); + + // Hoisted spills may cause dead code. + if (!DeadDefs.empty()) { + DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n"); + Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII); + } + + // Finally delete the SnippetCopies. + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()); + MachineInstr *MI = RI.skipInstruction();) { + assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy"); + // FIXME: Do this with a LiveRangeEdit callback. + VRM.RemoveMachineInstrFromMaps(MI); + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + } + + // Delete all spilled registers. + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) + Edit->eraseVirtReg(RegsToSpill[i], LIS); +} + +void InlineSpiller::spill(LiveRangeEdit &edit) { + Edit = &edit; + assert(!TargetRegisterInfo::isStackSlot(edit.getReg()) + && "Trying to spill a stack slot."); + // Share a stack slot among all descendants of Original. + Original = VRM.getOriginal(edit.getReg()); + StackSlot = VRM.getStackSlot(Original); + StackInt = 0; + + DEBUG(dbgs() << "Inline spilling " + << MRI.getRegClass(edit.getReg())->getName() + << ':' << edit.getParent() << "\nFrom original " + << LIS.getInterval(Original) << '\n'); + assert(edit.getParent().isSpillable() && + "Attempting to spill already spilled value."); + assert(DeadDefs.empty() && "Previous spill didn't remove dead defs"); + + collectRegsToSpill(); + analyzeSiblingValues(); + reMaterializeAll(); + + // Remat may handle everything. + if (!RegsToSpill.empty()) + spillAll(); + + Edit->calculateRegClassAndHint(MF, LIS, Loops); +} diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp new file mode 100644 index 0000000..b1014a9 --- /dev/null +++ b/lib/CodeGen/InterferenceCache.cpp @@ -0,0 +1,155 @@ +//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// InterferenceCache remembers per-block interference in LiveIntervalUnions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "InterferenceCache.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +void InterferenceCache::init(MachineFunction *mf, + LiveIntervalUnion *liuarray, + SlotIndexes *indexes, + const TargetRegisterInfo *tri) { + MF = mf; + LIUArray = liuarray; + TRI = tri; + PhysRegEntries.assign(TRI->getNumRegs(), 0); + for (unsigned i = 0; i != CacheEntries; ++i) + Entries[i].clear(mf, indexes); +} + +InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) { + unsigned E = PhysRegEntries[PhysReg]; + if (E < CacheEntries && Entries[E].getPhysReg() == PhysReg) { + if (!Entries[E].valid(LIUArray, TRI)) + Entries[E].revalidate(); + return &Entries[E]; + } + // No valid entry exists, pick the next round-robin entry. + E = RoundRobin; + if (++RoundRobin == CacheEntries) + RoundRobin = 0; + Entries[E].reset(PhysReg, LIUArray, TRI, MF); + PhysRegEntries[PhysReg] = E; + return &Entries[E]; +} + +/// revalidate - LIU contents have changed, update tags. +void InterferenceCache::Entry::revalidate() { + // Invalidate all block entries. + ++Tag; + // Invalidate all iterators. + PrevPos = SlotIndex(); + for (unsigned i = 0, e = Aliases.size(); i != e; ++i) + Aliases[i].second = Aliases[i].first->getTag(); +} + +void InterferenceCache::Entry::reset(unsigned physReg, + LiveIntervalUnion *LIUArray, + const TargetRegisterInfo *TRI, + const MachineFunction *MF) { + // LIU's changed, invalidate cache. + ++Tag; + PhysReg = physReg; + Blocks.resize(MF->getNumBlockIDs()); + Aliases.clear(); + for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS) { + LiveIntervalUnion *LIU = LIUArray + *AS; + Aliases.push_back(std::make_pair(LIU, LIU->getTag())); + } + + // Reset iterators. + PrevPos = SlotIndex(); + unsigned e = Aliases.size(); + Iters.resize(e); + for (unsigned i = 0; i != e; ++i) + Iters[i].setMap(Aliases[i].first->getMap()); +} + +bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray, + const TargetRegisterInfo *TRI) { + unsigned i = 0, e = Aliases.size(); + for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS, ++i) { + LiveIntervalUnion *LIU = LIUArray + *AS; + if (i == e || Aliases[i].first != LIU) + return false; + if (LIU->changedSince(Aliases[i].second)) + return false; + } + return i == e; +} + +void InterferenceCache::Entry::update(unsigned MBBNum) { + SlotIndex Start, Stop; + tie(Start, Stop) = Indexes->getMBBRange(MBBNum); + + // Use advanceTo only when possible. + if (PrevPos != Start) { + if (!PrevPos.isValid() || Start < PrevPos) + for (unsigned i = 0, e = Iters.size(); i != e; ++i) + Iters[i].find(Start); + else + for (unsigned i = 0, e = Iters.size(); i != e; ++i) + Iters[i].advanceTo(Start); + PrevPos = Start; + } + + MachineFunction::const_iterator MFI = MF->getBlockNumbered(MBBNum); + BlockInterference *BI = &Blocks[MBBNum]; + for (;;) { + BI->Tag = Tag; + BI->First = BI->Last = SlotIndex(); + + // Check for first interference. + for (unsigned i = 0, e = Iters.size(); i != e; ++i) { + Iter &I = Iters[i]; + if (!I.valid()) + continue; + SlotIndex StartI = I.start(); + if (StartI >= Stop) + continue; + if (!BI->First.isValid() || StartI < BI->First) + BI->First = StartI; + } + + PrevPos = Stop; + if (BI->First.isValid()) + break; + + // No interference in this block? Go ahead and precompute the next block. + if (++MFI == MF->end()) + return; + MBBNum = MFI->getNumber(); + BI = &Blocks[MBBNum]; + if (BI->Tag == Tag) + return; + tie(Start, Stop) = Indexes->getMBBRange(MBBNum); + } + + // Check for last interference in block. + for (unsigned i = 0, e = Iters.size(); i != e; ++i) { + Iter &I = Iters[i]; + if (!I.valid() || I.start() >= Stop) + continue; + I.advanceTo(Stop); + bool Backup = !I.valid() || I.start() >= Stop; + if (Backup) + --I; + SlotIndex StopI = I.stop(); + if (!BI->Last.isValid() || StopI > BI->Last) + BI->Last = StopI; + if (Backup) + ++I; + } +} diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h new file mode 100644 index 0000000..6c36fa4 --- /dev/null +++ b/lib/CodeGen/InterferenceCache.h @@ -0,0 +1,163 @@ +//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// InterferenceCache remembers per-block interference in LiveIntervalUnions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_INTERFERENCECACHE +#define LLVM_CODEGEN_INTERFERENCECACHE + +#include "LiveIntervalUnion.h" + +namespace llvm { + +class InterferenceCache { + const TargetRegisterInfo *TRI; + LiveIntervalUnion *LIUArray; + SlotIndexes *Indexes; + MachineFunction *MF; + + /// BlockInterference - information about the interference in a single basic + /// block. + struct BlockInterference { + BlockInterference() : Tag(0) {} + unsigned Tag; + SlotIndex First; + SlotIndex Last; + }; + + /// Entry - A cache entry containing interference information for all aliases + /// of PhysReg in all basic blocks. + class Entry { + /// PhysReg - The register currently represented. + unsigned PhysReg; + + /// Tag - Cache tag is changed when any of the underlying LiveIntervalUnions + /// change. + unsigned Tag; + + /// MF - The current function. + MachineFunction *MF; + + /// Indexes - Mapping block numbers to SlotIndex ranges. + SlotIndexes *Indexes; + + /// PrevPos - The previous position the iterators were moved to. + SlotIndex PrevPos; + + /// AliasTags - A LiveIntervalUnion pointer and tag for each alias of + /// PhysReg. + SmallVector<std::pair<LiveIntervalUnion*, unsigned>, 8> Aliases; + + typedef LiveIntervalUnion::SegmentIter Iter; + + /// Iters - an iterator for each alias + SmallVector<Iter, 8> Iters; + + /// Blocks - Interference for each block in the function. + SmallVector<BlockInterference, 8> Blocks; + + /// update - Recompute Blocks[MBBNum] + void update(unsigned MBBNum); + + public: + Entry() : PhysReg(0), Tag(0), Indexes(0) {} + + void clear(MachineFunction *mf, SlotIndexes *indexes) { + PhysReg = 0; + MF = mf; + Indexes = indexes; + } + + unsigned getPhysReg() const { return PhysReg; } + + void revalidate(); + + /// valid - Return true if this is a valid entry for physReg. + bool valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI); + + /// reset - Initialize entry to represent physReg's aliases. + void reset(unsigned physReg, + LiveIntervalUnion *LIUArray, + const TargetRegisterInfo *TRI, + const MachineFunction *MF); + + /// get - Return an up to date BlockInterference. + BlockInterference *get(unsigned MBBNum) { + if (Blocks[MBBNum].Tag != Tag) + update(MBBNum); + return &Blocks[MBBNum]; + } + }; + + // We don't keep a cache entry for every physical register, that would use too + // much memory. Instead, a fixed number of cache entries are used in a round- + // robin manner. + enum { CacheEntries = 32 }; + + // Point to an entry for each physreg. The entry pointed to may not be up to + // date, and it may have been reused for a different physreg. + SmallVector<unsigned char, 2> PhysRegEntries; + + // Next round-robin entry to be picked. + unsigned RoundRobin; + + // The actual cache entries. + Entry Entries[CacheEntries]; + + // get - Get a valid entry for PhysReg. + Entry *get(unsigned PhysReg); + +public: + InterferenceCache() : TRI(0), LIUArray(0), Indexes(0), MF(0), RoundRobin(0) {} + + /// init - Prepare cache for a new function. + void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, + const TargetRegisterInfo *); + + /// Cursor - The primary query interface for the block interference cache. + class Cursor { + Entry *CacheEntry; + BlockInterference *Current; + public: + /// Cursor - Create a cursor for the interference allocated to PhysReg and + /// all its aliases. + Cursor(InterferenceCache &Cache, unsigned PhysReg) + : CacheEntry(Cache.get(PhysReg)), Current(0) {} + + /// moveTo - Move cursor to basic block MBBNum. + void moveToBlock(unsigned MBBNum) { + Current = CacheEntry->get(MBBNum); + } + + /// hasInterference - Return true if the current block has any interference. + bool hasInterference() { + return Current->First.isValid(); + } + + /// first - Return the starting index of the first interfering range in the + /// current block. + SlotIndex first() { + return Current->First; + } + + /// last - Return the ending index of the last interfering range in the + /// current block. + SlotIndex last() { + return Current->Last; + } + }; + + friend class Cursor; +}; + +} // namespace llvm + +#endif diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 80dfc76..e1dad2e 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -98,12 +98,6 @@ static cl::opt<cl::boolOrDefault> EnableFastISelOption("fast-isel", cl::Hidden, cl::desc("Enable the \"fast\" instruction selector")); -// Enable or disable an experimental optimization to split GEPs -// and run a special GVN pass which does not examine loads, in -// an effort to factor out redundancy implicit in complex GEPs. -static cl::opt<bool> EnableSplitGEPGVN("split-gep-gvn", cl::Hidden, - cl::desc("Split GEPs and run no-load GVN")); - LLVMTargetMachine::LLVMTargetMachine(const Target &T, const std::string &Triple) : TargetMachine(T), TargetTriple(Triple) { @@ -132,6 +126,9 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, return true; assert(Context != 0 && "Failed to get MCContext"); + if (hasMCSaveTempLabels()) + Context->setAllowTemporaryLabels(false); + const MCAsmInfo &MAI = *getMCAsmInfo(); OwningPtr<MCStreamer> AsmStreamer; @@ -139,7 +136,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, default: return true; case CGFT_AssemblyFile: { MCInstPrinter *InstPrinter = - getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI); + getTarget().createMCInstPrinter(*this, MAI.getAssemblerDialect(), MAI); // Create a code emitter if asked to show the encoding. MCCodeEmitter *MCE = 0; @@ -152,6 +149,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, getVerboseAsm(), hasMCUseLoc(), + hasMCUseCFI(), InstPrinter, MCE, TAB, ShowMCInst); @@ -230,11 +228,40 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, /// bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, + raw_ostream &Out, CodeGenOpt::Level OptLevel, bool DisableVerify) { // Add common CodeGen passes. if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx)) return true; + + if (hasMCSaveTempLabels()) + Ctx->setAllowTemporaryLabels(false); + + // Create the code emitter for the target if it exists. If not, .o file + // emission fails. + MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Ctx); + TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple); + if (MCE == 0 || TAB == 0) + return true; + + OwningPtr<MCStreamer> AsmStreamer; + AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Ctx, + *TAB, Out, MCE, + hasMCRelaxAll(), + hasMCNoExecStack())); + AsmStreamer.get()->InitSections(); + + // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. + FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); + if (Printer == 0) + return true; + + // If successful, createAsmPrinter took ownership of AsmStreamer. + AsmStreamer.take(); + + PM.add(Printer); + // Make sure the code model is set. setCodeModelForJIT(); @@ -272,12 +299,6 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (!DisableVerify) PM.add(createVerifierPass()); - // Optionally, tun split-GEPs and no-load GVN. - if (EnableSplitGEPGVN) { - PM.add(createGEPSplitterPass()); - PM.add(createGVNPass(/*NoLoads=*/true)); - } - // Run loop strength reduction before anything else. if (OptLevel != CodeGenOpt::None && !DisableLSR) { PM.add(createLoopStrengthReducePass(getTargetLowering())); @@ -304,6 +325,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // FALLTHROUGH case ExceptionHandling::DwarfCFI: case ExceptionHandling::DwarfTable: + case ExceptionHandling::ARM: PM.add(createDwarfEHPass(this)); break; case ExceptionHandling::None: diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 853ec1a..8b21483 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -76,6 +77,7 @@ typedef IntervalMap<SlotIndex, unsigned, 4> LocMap; /// held by the same virtual register. The equivalence class is the transitive /// closure of that relation. namespace { +class LDVImpl; class UserValue { const MDNode *variable; ///< The debug info variable we are part of. unsigned offset; ///< Byte offset into variable. @@ -99,10 +101,6 @@ class UserValue { void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo, LiveIntervals &LIS, const TargetInstrInfo &TII); - /// insertDebugKill - Insert an undef DBG_VALUE into MBB at Idx. - void insertDebugKill(MachineBasicBlock *MBB, SlotIndex Idx, - LiveIntervals &LIS, const TargetInstrInfo &TII); - public: /// UserValue - Create a new UserValue. UserValue(const MDNode *var, unsigned o, DebugLoc L, @@ -146,17 +144,31 @@ public: /// getLocationNo - Return the location number that matches Loc. unsigned getLocationNo(const MachineOperand &LocMO) { - if (LocMO.isReg() && LocMO.getReg() == 0) - return ~0u; - for (unsigned i = 0, e = locations.size(); i != e; ++i) - if (LocMO.isIdenticalTo(locations[i])) - return i; + if (LocMO.isReg()) { + if (LocMO.getReg() == 0) + return ~0u; + // For register locations we dont care about use/def and other flags. + for (unsigned i = 0, e = locations.size(); i != e; ++i) + if (locations[i].isReg() && + locations[i].getReg() == LocMO.getReg() && + locations[i].getSubReg() == LocMO.getSubReg()) + return i; + } else + for (unsigned i = 0, e = locations.size(); i != e; ++i) + if (LocMO.isIdenticalTo(locations[i])) + return i; locations.push_back(LocMO); // We are storing a MachineOperand outside a MachineInstr. locations.back().clearParent(); + // Don't store def operands. + if (locations.back().isReg()) + locations.back().setIsUse(); return locations.size() - 1; } + /// mapVirtRegs - Ensure that all virtual register locations are mapped. + void mapVirtRegs(LDVImpl *LDV); + /// addDef - Add a definition point to this value. void addDef(SlotIndex Idx, const MachineOperand &LocMO) { // Add a singular (Idx,Idx) -> Loc mapping. @@ -168,19 +180,36 @@ public: /// extendDef - Extend the current definition as far as possible down the /// dominator tree. Stop when meeting an existing def or when leaving the live /// range of VNI. + /// End points where VNI is no longer live are added to Kills. /// @param Idx Starting point for the definition. /// @param LocNo Location number to propagate. /// @param LI Restrict liveness to where LI has the value VNI. May be null. /// @param VNI When LI is not null, this is the value to restrict to. + /// @param Kills Append end points of VNI's live range to Kills. /// @param LIS Live intervals analysis. /// @param MDT Dominator tree. void extendDef(SlotIndex Idx, unsigned LocNo, LiveInterval *LI, const VNInfo *VNI, + SmallVectorImpl<SlotIndex> *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT); + /// addDefsFromCopies - The value in LI/LocNo may be copies to other + /// registers. Determine if any of the copies are available at the kill + /// points, and add defs if possible. + /// @param LI Scan for copies of the value in LI->reg. + /// @param LocNo Location number of LI->reg. + /// @param Kills Points where the range of LocNo could be extended. + /// @param NewDefs Append (Idx, LocNo) of inserted defs here. + void addDefsFromCopies(LiveInterval *LI, unsigned LocNo, + const SmallVectorImpl<SlotIndex> &Kills, + SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs, + MachineRegisterInfo &MRI, + LiveIntervals &LIS); + /// computeIntervals - Compute the live intervals of all locations after /// collecting all their def points. - void computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT); + void computeIntervals(MachineRegisterInfo &MRI, + LiveIntervals &LIS, MachineDominatorTree &MDT); /// renameRegister - Update locations to rewrite OldReg as NewReg:SubIdx. void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx, @@ -230,9 +259,6 @@ class LDVImpl { /// lookupVirtReg - Find the EC leader for VirtReg or null. UserValue *lookupVirtReg(unsigned VirtReg); - /// mapVirtReg - Map virtual register to an equivalence class. - void mapVirtReg(unsigned VirtReg, UserValue *EC); - /// handleDebugValue - Add DBG_VALUE instruction to our maps. /// @param MI DBG_VALUE instruction /// @param Idx Last valid SLotIndex before instruction. @@ -261,7 +287,10 @@ public: userVarMap.clear(); } - /// renameRegister - Replace all references to OldReg wiht NewReg:SubIdx. + /// mapVirtReg - Map virtual register to an equivalence class. + void mapVirtReg(unsigned VirtReg, UserValue *EC); + + /// renameRegister - Replace all references to OldReg with NewReg:SubIdx. void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx); /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures. @@ -322,6 +351,13 @@ void UserValue::coalesceLocation(unsigned LocNo) { } } +void UserValue::mapVirtRegs(LDVImpl *LDV) { + for (unsigned i = 0, e = locations.size(); i != e; ++i) + if (locations[i].isReg() && + TargetRegisterInfo::isVirtualRegister(locations[i].getReg())) + LDV->mapVirtReg(locations[i].getReg(), this); +} + UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset, DebugLoc DL) { UserValue *&Leader = userVarMap[Var]; @@ -363,14 +399,6 @@ bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) { unsigned Offset = MI->getOperand(1).getImm(); const MDNode *Var = MI->getOperand(2).getMetadata(); UserValue *UV = getUserValue(Var, Offset, MI->getDebugLoc()); - - // If the location is a virtual register, make sure it is mapped. - if (MI->getOperand(0).isReg()) { - unsigned Reg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) - mapVirtReg(Reg, UV); - } - UV->addDef(Idx, MI->getOperand(0)); return true; } @@ -405,6 +433,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveInterval *LI, const VNInfo *VNI, + SmallVectorImpl<SlotIndex> *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT) { SmallVector<SlotIndex, 16> Todo; Todo.push_back(Idx); @@ -419,8 +448,11 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, bool ToEnd = true; if (LI && VNI) { LiveRange *Range = LI->getLiveRangeContaining(Start); - if (!Range || Range->valno != VNI) + if (!Range || Range->valno != VNI) { + if (Kills) + Kills->push_back(Start); continue; + } if (Range->end < Stop) Stop = Range->end, ToEnd = false; } @@ -438,6 +470,9 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, // Limited by the next def. if (I.valid() && I.start() < Stop) Stop = I.start(), ToEnd = false; + // Limited by VNI's live range. + else if (!ToEnd && Kills) + Kills->push_back(Stop); if (Start >= Stop) continue; @@ -455,7 +490,82 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, } void -UserValue::computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT) { +UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, + const SmallVectorImpl<SlotIndex> &Kills, + SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs, + MachineRegisterInfo &MRI, LiveIntervals &LIS) { + if (Kills.empty()) + return; + // Don't track copies from physregs, there are too many uses. + if (!TargetRegisterInfo::isVirtualRegister(LI->reg)) + return; + + // Collect all the (vreg, valno) pairs that are copies of LI. + SmallVector<std::pair<LiveInterval*, const VNInfo*>, 8> CopyValues; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI.use_nodbg_begin(LI->reg), + UE = MRI.use_nodbg_end(); UI != UE; ++UI) { + // Copies of the full value. + if (UI.getOperand().getSubReg() || !UI->isCopy()) + continue; + MachineInstr *MI = &*UI; + unsigned DstReg = MI->getOperand(0).getReg(); + + // Don't follow copies to physregs. These are usually setting up call + // arguments, and the argument registers are always call clobbered. We are + // better off in the source register which could be a callee-saved register, + // or it could be spilled. + if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + continue; + + // Is LocNo extended to reach this copy? If not, another def may be blocking + // it, or we are looking at a wrong value of LI. + SlotIndex Idx = LIS.getInstructionIndex(MI); + LocMap::iterator I = locInts.find(Idx.getUseIndex()); + if (!I.valid() || I.value() != LocNo) + continue; + + if (!LIS.hasInterval(DstReg)) + continue; + LiveInterval *DstLI = &LIS.getInterval(DstReg); + const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getDefIndex()); + assert(DstVNI && DstVNI->def == Idx.getDefIndex() && "Bad copy value"); + CopyValues.push_back(std::make_pair(DstLI, DstVNI)); + } + + if (CopyValues.empty()) + return; + + DEBUG(dbgs() << "Got " << CopyValues.size() << " copies of " << *LI << '\n'); + + // Try to add defs of the copied values for each kill point. + for (unsigned i = 0, e = Kills.size(); i != e; ++i) { + SlotIndex Idx = Kills[i]; + for (unsigned j = 0, e = CopyValues.size(); j != e; ++j) { + LiveInterval *DstLI = CopyValues[j].first; + const VNInfo *DstVNI = CopyValues[j].second; + if (DstLI->getVNInfoAt(Idx) != DstVNI) + continue; + // Check that there isn't already a def at Idx + LocMap::iterator I = locInts.find(Idx); + if (I.valid() && I.start() <= Idx) + continue; + DEBUG(dbgs() << "Kill at " << Idx << " covered by valno #" + << DstVNI->id << " in " << *DstLI << '\n'); + MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def); + assert(CopyMI && CopyMI->isCopy() && "Bad copy value"); + unsigned LocNo = getLocationNo(CopyMI->getOperand(0)); + I.insert(Idx, Idx.getNextSlot(), LocNo); + NewDefs.push_back(std::make_pair(Idx, LocNo)); + break; + } + } +} + +void +UserValue::computeIntervals(MachineRegisterInfo &MRI, + LiveIntervals &LIS, + MachineDominatorTree &MDT) { SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs; // Collect all defs to be extended (Skipping undefs). @@ -463,7 +573,8 @@ UserValue::computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT) { if (I.value() != ~0u) Defs.push_back(std::make_pair(I.start(), I.value())); - for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + // Extend all defs, and possibly add new ones along the way. + for (unsigned i = 0; i != Defs.size(); ++i) { SlotIndex Idx = Defs[i].first; unsigned LocNo = Defs[i].second; const MachineOperand &Loc = locations[LocNo]; @@ -472,9 +583,11 @@ UserValue::computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT) { if (Loc.isReg() && LIS.hasInterval(Loc.getReg())) { LiveInterval *LI = &LIS.getInterval(Loc.getReg()); const VNInfo *VNI = LI->getVNInfoAt(Idx); - extendDef(Idx, LocNo, LI, VNI, LIS, MDT); + SmallVector<SlotIndex, 16> Kills; + extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT); + addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS); } else - extendDef(Idx, LocNo, 0, 0, LIS, MDT); + extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT); } // Finally, erase all the undefs. @@ -486,8 +599,10 @@ UserValue::computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT) { } void LDVImpl::computeIntervals() { - for (unsigned i = 0, e = userValues.size(); i != e; ++i) - userValues[i]->computeIntervals(*LIS, *MDT); + for (unsigned i = 0, e = userValues.size(); i != e; ++i) { + userValues[i]->computeIntervals(MF->getRegInfo(), *LIS, *MDT); + userValues[i]->mapVirtRegs(this); + } } bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { @@ -640,13 +755,6 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, .addOperand(Loc).addImm(offset).addMetadata(variable); } -void UserValue::insertDebugKill(MachineBasicBlock *MBB, SlotIndex Idx, - LiveIntervals &LIS, const TargetInstrInfo &TII) { - MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS); - BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)).addReg(0) - .addImm(offset).addMetadata(variable); -} - void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, const TargetInstrInfo &TII) { MachineFunction::iterator MFEnd = VRM->getMachineFunction().end(); @@ -678,12 +786,6 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, break; ++I; - if (Stop == MBBEnd) - continue; - // The current interval ends before MBB. - // Insert a kill if there is a gap. - if (!I.valid() || I.start() > Stop) - insertDebugKill(MBB, Stop, LIS, TII); } } diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index c2dbd6a..cfade24 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -30,19 +30,22 @@ #include <algorithm> using namespace llvm; -// CompEnd - Compare LiveRange ends. -namespace { -struct CompEnd { - bool operator()(const LiveRange &A, const LiveRange &B) const { - return A.end < B.end; - } -}; -} - LiveInterval::iterator LiveInterval::find(SlotIndex Pos) { - assert(Pos.isValid() && "Cannot search for an invalid index"); - return std::upper_bound(begin(), end(), LiveRange(SlotIndex(), Pos, 0), - CompEnd()); + // This algorithm is basically std::upper_bound. + // Unfortunately, std::upper_bound cannot be used with mixed types until we + // adopt C++0x. Many libraries can do it, but not all. + if (empty() || Pos >= endIndex()) + return end(); + iterator I = begin(); + size_t Len = ranges.size(); + do { + size_t Mid = Len >> 1; + if (Pos < I[Mid].end) + Len = Mid; + else + I += Mid + 1, Len -= Mid + 1; + } while (Len); + return I; } /// killedInRange - Return true if the interval has kills in [Start,End). @@ -291,6 +294,22 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) { return ranges.insert(it, LR); } +/// extendInBlock - If this interval is live before UseIdx in the basic +/// block that starts at StartIdx, extend it to be live at UseIdx and return +/// the value. If there is no live range before UseIdx, return NULL. +VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex UseIdx) { + if (empty()) + return 0; + iterator I = std::upper_bound(begin(), end(), UseIdx); + if (I == begin()) + return 0; + --I; + if (I->end <= StartIdx) + return 0; + if (I->end <= UseIdx) + extendIntervalEndTo(I, UseIdx.getNextSlot()); + return I->valno; +} /// removeRange - Remove the specified range from this interval. Note that /// the range must be in a single LiveRange in its entirety. @@ -476,60 +495,19 @@ void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, void LiveInterval::MergeValueInAsValue( const LiveInterval &RHS, const VNInfo *RHSValNo, VNInfo *LHSValNo) { - SmallVector<VNInfo*, 4> ReplacedValNos; - iterator IP = begin(); + // TODO: Make this more efficient. + iterator InsertPos = begin(); for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { - assert(I->valno == RHS.getValNumInfo(I->valno->id) && "Bad VNInfo"); if (I->valno != RHSValNo) continue; - SlotIndex Start = I->start, End = I->end; - IP = std::upper_bound(IP, end(), Start); - // If the start of this range overlaps with an existing liverange, trim it. - if (IP != begin() && IP[-1].end > Start) { - if (IP[-1].valno != LHSValNo) { - ReplacedValNos.push_back(IP[-1].valno); - IP[-1].valno = LHSValNo; // Update val#. - } - Start = IP[-1].end; - // Trimmed away the whole range? - if (Start >= End) continue; - } - // If the end of this range overlaps with an existing liverange, trim it. - if (IP != end() && End > IP->start) { - if (IP->valno != LHSValNo) { - ReplacedValNos.push_back(IP->valno); - IP->valno = LHSValNo; // Update val#. - } - End = IP->start; - // If this trimmed away the whole range, ignore it. - if (Start == End) continue; - } - // Map the valno in the other live range to the current live range. - IP = addRangeFrom(LiveRange(Start, End, LHSValNo), IP); - } - - - SmallSet<VNInfo*, 4> Seen; - for (unsigned i = 0, e = ReplacedValNos.size(); i != e; ++i) { - VNInfo *V1 = ReplacedValNos[i]; - if (Seen.insert(V1)) { - bool isDead = true; - for (const_iterator I = begin(), E = end(); I != E; ++I) - if (I->valno == V1) { - isDead = false; - break; - } - if (isDead) { - // Now that V1 is dead, remove it. - markValNoForDeletion(V1); - } - } + LiveRange Tmp = *I; + Tmp.valno = LHSValNo; + InsertPos = addRangeFrom(Tmp, InsertPos); } } - /// MergeValueNumberInto - This method is called when two value nubmers /// are found to be equivalent. This eliminates V1, replacing all /// LiveRanges with the V1 value number with the V2 value number. This can @@ -700,8 +678,8 @@ void LiveRange::print(raw_ostream &os) const { unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { // Create initial equivalence classes. - eqClass_.clear(); - eqClass_.grow(LI->getNumValNums()); + EqClass.clear(); + EqClass.grow(LI->getNumValNums()); const VNInfo *used = 0, *unused = 0; @@ -712,48 +690,65 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { // Group all unused values into one class. if (VNI->isUnused()) { if (unused) - eqClass_.join(unused->id, VNI->id); + EqClass.join(unused->id, VNI->id); unused = VNI; continue; } used = VNI; if (VNI->isPHIDef()) { - const MachineBasicBlock *MBB = lis_.getMBBFromIndex(VNI->def); + const MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); assert(MBB && "Phi-def has no defining MBB"); // Connect to values live out of predecessors. for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) if (const VNInfo *PVNI = - LI->getVNInfoAt(lis_.getMBBEndIdx(*PI).getPrevSlot())) - eqClass_.join(VNI->id, PVNI->id); + LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot())) + EqClass.join(VNI->id, PVNI->id); } else { // Normal value defined by an instruction. Check for two-addr redef. // FIXME: This could be coincidental. Should we really check for a tied // operand constraint? // Note that VNI->def may be a use slot for an early clobber def. if (const VNInfo *UVNI = LI->getVNInfoAt(VNI->def.getPrevSlot())) - eqClass_.join(VNI->id, UVNI->id); + EqClass.join(VNI->id, UVNI->id); } } // Lump all the unused values in with the last used value. if (used && unused) - eqClass_.join(used->id, unused->id); + EqClass.join(used->id, unused->id); - eqClass_.compress(); - return eqClass_.getNumClasses(); + EqClass.compress(); + return EqClass.getNumClasses(); } -void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[]) { +void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], + MachineRegisterInfo &MRI) { assert(LIV[0] && "LIV[0] must be set"); LiveInterval &LI = *LIV[0]; - // First move runs to new intervals. + // Rewrite instructions. + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg), + RE = MRI.reg_end(); RI != RE;) { + MachineOperand &MO = RI.getOperand(); + MachineInstr *MI = MO.getParent(); + ++RI; + if (MO.isUse() && MO.isUndef()) + continue; + // DBG_VALUE instructions should have been eliminated earlier. + SlotIndex Idx = LIS.getInstructionIndex(MI); + Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex(); + const VNInfo *VNI = LI.getVNInfoAt(Idx); + assert(VNI && "Interval not live at use."); + MO.setReg(LIV[getEqClass(VNI)]->reg); + } + + // Move runs to new intervals. LiveInterval::iterator J = LI.begin(), E = LI.end(); - while (J != E && eqClass_[J->valno->id] == 0) + while (J != E && EqClass[J->valno->id] == 0) ++J; for (LiveInterval::iterator I = J; I != E; ++I) { - if (unsigned eq = eqClass_[I->valno->id]) { + if (unsigned eq = EqClass[I->valno->id]) { assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) && "New intervals should be empty"); LIV[eq]->ranges.push_back(*I); @@ -764,11 +759,11 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[]) { // Transfer VNInfos to their new owners and renumber them. unsigned j = 0, e = LI.getNumValNums(); - while (j != e && eqClass_[j] == 0) + while (j != e && EqClass[j] == 0) ++j; for (unsigned i = j; i != e; ++i) { VNInfo *VNI = LI.getValNumInfo(i); - if (unsigned eq = eqClass_[i]) { + if (unsigned eq = EqClass[i]) { VNI->id = LIV[eq]->getNumValNums(); LIV[eq]->valnos.push_back(VNI); } else { diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index aef5b5f..9257191 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -572,19 +572,12 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx, getOrCreateInterval(MO.getReg())); - else if (allocatableRegs_[MO.getReg()]) { + else { MachineInstr *CopyMI = NULL; if (MI->isCopyLike()) CopyMI = MI; handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, getOrCreateInterval(MO.getReg()), CopyMI); - // Def of a register also defines its sub-registers. - for (const unsigned* AS = tri_->getSubRegisters(MO.getReg()); *AS; ++AS) - // If MI also modifies the sub-register explicitly, avoid processing it - // more than once. Do not pass in TRI here so it checks for exact match. - if (!MI->definesRegister(*AS)) - handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, - getOrCreateInterval(*AS), 0); } } @@ -645,7 +638,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, end = MIIdx.getStoreIndex(); } else { DEBUG(dbgs() << " live through"); - end = baseIndex; + end = getMBBEndIdx(MBB); } } @@ -746,7 +739,8 @@ LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) { /// shrinkToUses - After removing some uses of a register, shrink its live /// range to just the remaining uses. This method does not compute reaching /// defs for new uses, and it doesn't remove dead defs. -void LiveIntervals::shrinkToUses(LiveInterval *li) { +bool LiveIntervals::shrinkToUses(LiveInterval *li, + SmallVectorImpl<MachineInstr*> *dead) { DEBUG(dbgs() << "Shrink: " << *li << '\n'); assert(TargetRegisterInfo::isVirtualRegister(li->reg) && "Can't only shrink physical registers"); @@ -760,7 +754,15 @@ void LiveIntervals::shrinkToUses(LiveInterval *li) { continue; SlotIndex Idx = getInstructionIndex(UseMI).getUseIndex(); VNInfo *VNI = li->getVNInfoAt(Idx); - assert(VNI && "Live interval not live into reading instruction"); + if (!VNI) { + // This shouldn't happen: readsVirtualRegister returns true, but there is + // no live value. It is likely caused by a target getting <undef> flags + // wrong. + DEBUG(dbgs() << Idx << '\t' << *UseMI + << "Warning: Instr claims to read non-existent value in " + << *li << '\n'); + continue; + } if (VNI->def == Idx) { // Special case: An early-clobber tied operand reads and writes the // register one slot early. @@ -778,49 +780,47 @@ void LiveIntervals::shrinkToUses(LiveInterval *li) { VNInfo *VNI = *I; if (VNI->isUnused()) continue; + // We may eliminate PHI values, so recompute PHIKill flags. + VNI->setHasPHIKill(false); NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI)); + + // A use tied to an early-clobber def ends at the load slot and isn't caught + // above. Catch it here instead. This probably only ever happens for inline + // assembly. + if (VNI->def.isUse()) + if (VNInfo *UVNI = li->getVNInfoAt(VNI->def.getLoadIndex())) + WorkList.push_back(std::make_pair(VNI->def.getLoadIndex(), UVNI)); } + // Keep track of the PHIs that are in use. + SmallPtrSet<VNInfo*, 8> UsedPHIs; + // Extend intervals to reach all uses in WorkList. while (!WorkList.empty()) { SlotIndex Idx = WorkList.back().first; VNInfo *VNI = WorkList.back().second; WorkList.pop_back(); - - // Extend the live range for VNI to be live at Idx. - LiveInterval::iterator I = NewLI.find(Idx); - - // Already got it? - if (I != NewLI.end() && I->start <= Idx) { - assert(I->valno == VNI && "Unexpected existing value number"); - continue; - } - - // Is there already a live range in the block containing Idx? const MachineBasicBlock *MBB = getMBBFromIndex(Idx); SlotIndex BlockStart = getMBBStartIdx(MBB); - DEBUG(dbgs() << "Shrink: Use val#" << VNI->id << " at " << Idx - << " in BB#" << MBB->getNumber() << '@' << BlockStart); - if (I != NewLI.begin() && (--I)->end > BlockStart) { - assert(I->valno == VNI && "Wrong reaching def"); - DEBUG(dbgs() << " extend [" << I->start << ';' << I->end << ")\n"); - // Is this the first use of a PHIDef in its defining block? - if (VNI->isPHIDef() && I->end == VNI->def.getNextSlot()) { - // The PHI is live, make sure the predecessors are live-out. - for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot(); - VNInfo *PVNI = li->getVNInfoAt(Stop); - // A predecessor is not required to have a live-out value for a PHI. - if (PVNI) { - assert(PVNI->hasPHIKill() && "Missing hasPHIKill flag"); - WorkList.push_back(std::make_pair(Stop, PVNI)); - } + + // Extend the live range for VNI to be live at Idx. + if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) { + (void)ExtVNI; + assert(ExtVNI == VNI && "Unexpected existing value number"); + // Is this a PHIDef we haven't seen before? + if (!VNI->isPHIDef() || VNI->def != BlockStart || !UsedPHIs.insert(VNI)) + continue; + // The PHI is live, make sure the predecessors are live-out. + for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot(); + VNInfo *PVNI = li->getVNInfoAt(Stop); + // A predecessor is not required to have a live-out value for a PHI. + if (PVNI) { + PVNI->setHasPHIKill(true); + WorkList.push_back(std::make_pair(Stop, PVNI)); } } - - // Extend the live range in the block to include Idx. - NewLI.addRange(LiveRange(I->end, Idx.getNextSlot(), VNI)); continue; } @@ -838,6 +838,7 @@ void LiveIntervals::shrinkToUses(LiveInterval *li) { } // Handle dead values. + bool CanSeparate = false; for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end(); I != E; ++I) { VNInfo *VNI = *I; @@ -847,21 +848,28 @@ void LiveIntervals::shrinkToUses(LiveInterval *li) { assert(LII != NewLI.end() && "Missing live range for PHI"); if (LII->end != VNI->def.getNextSlot()) continue; - if (!VNI->isPHIDef()) { + if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. VNI->setIsUnused(true); NewLI.removeRange(*LII); + DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); + CanSeparate = true; } else { // This is a dead def. Make sure the instruction knows. MachineInstr *MI = getInstructionFromIndex(VNI->def); assert(MI && "No instruction defining live value"); MI->addRegisterDead(li->reg, tri_); + if (dead && MI->allDefsAreDead()) { + DEBUG(dbgs() << "All defs dead: " << VNI->def << '\t' << *MI); + dead->push_back(MI); + } } } // Move the trimmed ranges back. li->ranges.swap(NewLI.ranges); - DEBUG(dbgs() << "Shrink: " << *li << '\n'); + DEBUG(dbgs() << "Shrunk: " << *li << '\n'); + return CanSeparate; } @@ -955,7 +963,7 @@ bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI, bool LiveIntervals::isReMaterializable(const LiveInterval &li, const VNInfo *ValNo, MachineInstr *MI, - const SmallVectorImpl<LiveInterval*> &SpillIs, + const SmallVectorImpl<LiveInterval*> *SpillIs, bool &isLoad) { if (DisableReMat) return false; @@ -982,9 +990,10 @@ LiveIntervals::isReMaterializable(const LiveInterval &li, // If a register operand of the re-materialized instruction is going to // be spilled next, then it's not legal to re-materialize this instruction. - for (unsigned i = 0, e = SpillIs.size(); i != e; ++i) - if (ImpUse == SpillIs[i]->reg) - return false; + if (SpillIs) + for (unsigned i = 0, e = SpillIs->size(); i != e; ++i) + if (ImpUse == (*SpillIs)[i]->reg) + return false; } return true; } @@ -993,16 +1002,15 @@ LiveIntervals::isReMaterializable(const LiveInterval &li, /// val# of the specified interval is re-materializable. bool LiveIntervals::isReMaterializable(const LiveInterval &li, const VNInfo *ValNo, MachineInstr *MI) { - SmallVector<LiveInterval*, 4> Dummy1; bool Dummy2; - return isReMaterializable(li, ValNo, MI, Dummy1, Dummy2); + return isReMaterializable(li, ValNo, MI, 0, Dummy2); } /// isReMaterializable - Returns true if every definition of MI of every /// val# of the specified interval is re-materializable. bool LiveIntervals::isReMaterializable(const LiveInterval &li, - const SmallVectorImpl<LiveInterval*> &SpillIs, + const SmallVectorImpl<LiveInterval*> *SpillIs, bool &isLoad) { isLoad = false; for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end(); @@ -1499,7 +1507,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, // ... // def = ... // = use - // It's better to start a new interval to avoid artifically + // It's better to start a new interval to avoid artificially // extend the new interval. if (MI->readsWritesVirtualRegister(li.reg) == std::make_pair(false,true)) { @@ -1702,7 +1710,9 @@ LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { // overflow a float. This expression behaves like 10^d for small d, but is // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of // headroom before overflow. - float lc = std::pow(1 + (100.0f / (loopDepth+10)), (float)loopDepth); + // By the way, powf() might be unavailable here. For consistency, + // We may take pow(double,double). + float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth); return (isDef + isUse) * lc; } @@ -1715,7 +1725,7 @@ static void normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) { std::vector<LiveInterval*> LiveIntervals:: addIntervalsForSpills(const LiveInterval &li, - const SmallVectorImpl<LiveInterval*> &SpillIs, + const SmallVectorImpl<LiveInterval*> *SpillIs, const MachineLoopInfo *loopInfo, VirtRegMap &vrm) { assert(li.isSpillable() && "attempt to spill already spilled interval!"); diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp index 205f28a..b67f966 100644 --- a/lib/CodeGen/LiveIntervalUnion.cpp +++ b/lib/CodeGen/LiveIntervalUnion.cpp @@ -35,12 +35,20 @@ void LiveIntervalUnion::unify(LiveInterval &VirtReg) { LiveInterval::iterator RegEnd = VirtReg.end(); SegmentIter SegPos = Segments.find(RegPos->start); - for (;;) { + while (SegPos.valid()) { SegPos.insert(RegPos->start, RegPos->end, &VirtReg); if (++RegPos == RegEnd) return; SegPos.advanceTo(RegPos->start); } + + // We have reached the end of Segments, so it is no longer necessary to search + // for the insertion position. + // It is faster to insert the end first. + --RegEnd; + SegPos.insert(RegEnd->start, RegEnd->end, &VirtReg); + for (; RegPos != RegEnd; ++RegPos, ++SegPos) + SegPos.insert(RegPos->start, RegPos->end, &VirtReg); } // Remove a live virtual register's segments from this union. @@ -168,6 +176,7 @@ LiveIntervalUnion::Query::firstInterference() { return FirstInterference; CheckedFirstInterference = true; InterferenceResult &IR = FirstInterference; + IR.LiveUnionI.setMap(LiveUnion->getMap()); // Quickly skip interference check for empty sets. if (VirtReg->empty() || LiveUnion->empty()) { @@ -176,10 +185,10 @@ LiveIntervalUnion::Query::firstInterference() { // VirtReg starts first, perform double binary search. IR.VirtRegI = VirtReg->find(LiveUnion->startIndex()); if (IR.VirtRegI != VirtReg->end()) - IR.LiveUnionI = LiveUnion->find(IR.VirtRegI->start); + IR.LiveUnionI.find(IR.VirtRegI->start); } else { // LiveUnion starts first, perform double binary search. - IR.LiveUnionI = LiveUnion->find(VirtReg->beginIndex()); + IR.LiveUnionI.find(VirtReg->beginIndex()); if (IR.LiveUnionI.valid()) IR.VirtRegI = VirtReg->find(IR.LiveUnionI.start()); else @@ -235,7 +244,7 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const { // // For comments on how to speed it up, see Query::findIntersection(). unsigned LiveIntervalUnion::Query:: -collectInterferingVRegs(unsigned MaxInterferingRegs) { +collectInterferingVRegs(unsigned MaxInterferingRegs, float MaxWeight) { InterferenceResult IR = firstInterference(); LiveInterval::iterator VirtRegEnd = VirtReg->end(); LiveInterval *RecentInterferingVReg = NULL; @@ -277,6 +286,11 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) { // Cache the most recent interfering vreg to bypass isSeenInterference. RecentInterferingVReg = IR.LiveUnionI.value(); ++IR.LiveUnionI; + + // Stop collecting when the max weight is exceeded. + if (RecentInterferingVReg->weight >= MaxWeight) + return InterferingVRegs.size(); + continue; } // VirtRegI may have advanced far beyond LiveUnionI, diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h index 6f9c5f4..c83578e 100644 --- a/lib/CodeGen/LiveIntervalUnion.h +++ b/lib/CodeGen/LiveIntervalUnion.h @@ -95,6 +95,9 @@ public: // Remove a live virtual register's segments from this union. void extract(LiveInterval &VirtReg); + // Remove all inserted virtual registers. + void clear() { Segments.clear(); ++Tag; } + // Print union, using TRI to translate register names void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const; @@ -163,10 +166,10 @@ public: bool CheckedFirstInterference; bool SeenAllInterferences; bool SeenUnspillableVReg; - unsigned Tag; + unsigned Tag, UserTag; public: - Query(): LiveUnion(), VirtReg() {} + Query(): LiveUnion(), VirtReg(), Tag(0), UserTag(0) {} Query(LiveInterval *VReg, LiveIntervalUnion *LIU): LiveUnion(LIU), VirtReg(VReg), CheckedFirstInterference(false), @@ -181,11 +184,13 @@ public: SeenAllInterferences = false; SeenUnspillableVReg = false; Tag = 0; + UserTag = 0; } - void init(LiveInterval *VReg, LiveIntervalUnion *LIU) { + void init(unsigned UTag, LiveInterval *VReg, LiveIntervalUnion *LIU) { assert(VReg && LIU && "Invalid arguments"); - if (VirtReg == VReg && LiveUnion == LIU && !LIU->changedSince(Tag)) { + if (UserTag == UTag && VirtReg == VReg && + LiveUnion == LIU && !LIU->changedSince(Tag)) { // Retain cached results, e.g. firstInterference. return; } @@ -193,6 +198,7 @@ public: LiveUnion = LIU; VirtReg = VReg; Tag = LIU->getTag(); + UserTag = UTag; } LiveInterval &virtReg() const { @@ -223,7 +229,8 @@ public: // Count the virtual registers in this union that interfere with this // query's live virtual register, up to maxInterferingRegs. - unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX); + unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX, + float MaxWeight = HUGE_VALF); // Was this virtual register visited during collectInterferingVRegs? bool isSeenInterference(LiveInterval *VReg) const; diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 3bbda1c..f8a3dbb 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -11,24 +11,41 @@ // is spilled or split. //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "regalloc" #include "LiveRangeEdit.h" #include "VirtRegMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; -LiveInterval &LiveRangeEdit::create(MachineRegisterInfo &mri, - LiveIntervals &lis, - VirtRegMap &vrm) { - const TargetRegisterClass *RC = mri.getRegClass(getReg()); - unsigned VReg = mri.createVirtualRegister(RC); - vrm.grow(); - vrm.setIsSplitFromReg(VReg, vrm.getOriginal(getReg())); - LiveInterval &li = lis.getOrCreateInterval(VReg); - newRegs_.push_back(&li); - return li; +LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg, + LiveIntervals &LIS, + VirtRegMap &VRM) { + MachineRegisterInfo &MRI = VRM.getRegInfo(); + unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + VRM.grow(); + VRM.setIsSplitFromReg(VReg, VRM.getOriginal(OldReg)); + LiveInterval &LI = LIS.getOrCreateInterval(VReg); + newRegs_.push_back(&LI); + return LI; +} + +bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, + const MachineInstr *DefMI, + const TargetInstrInfo &tii, + AliasAnalysis *aa) { + assert(DefMI && "Missing instruction"); + scannedRemattable_ = true; + if (!tii.isTriviallyReMaterializable(DefMI, aa)) + return false; + remattable_.insert(VNI); + return true; } void LiveRangeEdit::scanRemattable(LiveIntervals &lis, @@ -42,8 +59,7 @@ void LiveRangeEdit::scanRemattable(LiveIntervals &lis, MachineInstr *DefMI = lis.getInstructionFromIndex(VNI->def); if (!DefMI) continue; - if (tii.isTriviallyReMaterializable(DefMI, aa)) - remattable_.insert(VNI); + checkRematerializable(VNI, DefMI, tii, aa); } scannedRemattable_ = true; } @@ -66,18 +82,16 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, UseIdx = UseIdx.getUseIndex(); for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = OrigMI->getOperand(i); - if (!MO.isReg() || !MO.getReg() || MO.getReg() == getReg()) + if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; // Reserved registers are OK. if (MO.isUndef() || !lis.hasInterval(MO.getReg())) continue; - // We don't want to move any defs. - if (MO.isDef()) - return false; // We cannot depend on virtual registers in uselessRegs_. - for (unsigned ui = 0, ue = uselessRegs_.size(); ui != ue; ++ui) - if (uselessRegs_[ui]->reg == MO.getReg()) - return false; + if (uselessRegs_) + for (unsigned ui = 0, ue = uselessRegs_->size(); ui != ue; ++ui) + if ((*uselessRegs_)[ui]->reg == MO.getReg()) + return false; LiveInterval &li = lis.getInterval(MO.getReg()); const VNInfo *OVNI = li.getVNInfoAt(OrigIdx); @@ -99,16 +113,22 @@ bool LiveRangeEdit::canRematerializeAt(Remat &RM, if (!remattable_.count(RM.ParentVNI)) return false; - // No defining instruction. - RM.OrigMI = lis.getInstructionFromIndex(RM.ParentVNI->def); - assert(RM.OrigMI && "Defining instruction for remattable value disappeared"); + // No defining instruction provided. + SlotIndex DefIdx; + if (RM.OrigMI) + DefIdx = lis.getInstructionIndex(RM.OrigMI); + else { + DefIdx = RM.ParentVNI->def; + RM.OrigMI = lis.getInstructionFromIndex(DefIdx); + assert(RM.OrigMI && "No defining instruction for remattable value"); + } // If only cheap remats were requested, bail out early. if (cheapAsAMove && !RM.OrigMI->getDesc().isAsCheapAsAMove()) return false; // Verify that all used registers are available with the same values. - if (!allUsesAvailableAt(RM.OrigMI, RM.ParentVNI->def, UseIdx, lis)) + if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx, lis)) return false; return true; @@ -120,10 +140,174 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB, const Remat &RM, LiveIntervals &lis, const TargetInstrInfo &tii, - const TargetRegisterInfo &tri) { + const TargetRegisterInfo &tri, + bool Late) { assert(RM.OrigMI && "Invalid remat"); tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri); rematted_.insert(RM.ParentVNI); - return lis.InsertMachineInstrInMaps(--MI).getDefIndex(); + return lis.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late) + .getDefIndex(); +} + +void LiveRangeEdit::eraseVirtReg(unsigned Reg, LiveIntervals &LIS) { + if (delegate_ && delegate_->LRE_CanEraseVirtReg(Reg)) + LIS.removeInterval(Reg); +} + +bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, + SmallVectorImpl<MachineInstr*> &Dead, + MachineRegisterInfo &MRI, + LiveIntervals &LIS, + const TargetInstrInfo &TII) { + MachineInstr *DefMI = 0, *UseMI = 0; + + // Check that there is a single def and a single use. + for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(LI->reg), + E = MRI.reg_nodbg_end(); I != E; ++I) { + MachineOperand &MO = I.getOperand(); + MachineInstr *MI = MO.getParent(); + if (MO.isDef()) { + if (DefMI && DefMI != MI) + return false; + if (!MI->getDesc().canFoldAsLoad()) + return false; + DefMI = MI; + } else if (!MO.isUndef()) { + if (UseMI && UseMI != MI) + return false; + // FIXME: Targets don't know how to fold subreg uses. + if (MO.getSubReg()) + return false; + UseMI = MI; + } + } + if (!DefMI || !UseMI) + return false; + + DEBUG(dbgs() << "Try to fold single def: " << *DefMI + << " into single use: " << *UseMI); + + SmallVector<unsigned, 8> Ops; + if (UseMI->readsWritesVirtualRegister(LI->reg, &Ops).second) + return false; + + MachineInstr *FoldMI = TII.foldMemoryOperand(UseMI, Ops, DefMI); + if (!FoldMI) + return false; + DEBUG(dbgs() << " folded: " << *FoldMI); + LIS.ReplaceMachineInstrInMaps(UseMI, FoldMI); + UseMI->eraseFromParent(); + DefMI->addRegisterDead(LI->reg, 0); + Dead.push_back(DefMI); + return true; +} + +void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, + LiveIntervals &LIS, VirtRegMap &VRM, + const TargetInstrInfo &TII) { + SetVector<LiveInterval*, + SmallVector<LiveInterval*, 8>, + SmallPtrSet<LiveInterval*, 8> > ToShrink; + MachineRegisterInfo &MRI = VRM.getRegInfo(); + + for (;;) { + // Erase all dead defs. + while (!Dead.empty()) { + MachineInstr *MI = Dead.pop_back_val(); + assert(MI->allDefsAreDead() && "Def isn't really dead"); + SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex(); + + // Never delete inline asm. + if (MI->isInlineAsm()) { + DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI); + continue; + } + + // Use the same criteria as DeadMachineInstructionElim. + bool SawStore = false; + if (!MI->isSafeToMove(&TII, 0, SawStore)) { + DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI); + continue; + } + + DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI); + + // Check for live intervals that may shrink + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + if (!MOI->isReg()) + continue; + unsigned Reg = MOI->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + LiveInterval &LI = LIS.getInterval(Reg); + + // Shrink read registers, unless it is likely to be expensive and + // unlikely to change anything. We typically don't want to shrink the + // PIC base register that has lots of uses everywhere. + // Always shrink COPY uses that probably come from live range splitting. + if (MI->readsVirtualRegister(Reg) && + (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) || + LI.killedAt(Idx))) + ToShrink.insert(&LI); + + // Remove defined value. + if (MOI->isDef()) { + if (VNInfo *VNI = LI.getVNInfoAt(Idx)) { + if (delegate_) + delegate_->LRE_WillShrinkVirtReg(LI.reg); + LI.removeValNo(VNI); + if (LI.empty()) { + ToShrink.remove(&LI); + eraseVirtReg(Reg, LIS); + } + } + } + } + + if (delegate_) + delegate_->LRE_WillEraseInstruction(MI); + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + } + + if (ToShrink.empty()) + break; + + // Shrink just one live interval. Then delete new dead defs. + LiveInterval *LI = ToShrink.back(); + ToShrink.pop_back(); + if (foldAsLoad(LI, Dead, MRI, LIS, TII)) + continue; + if (delegate_) + delegate_->LRE_WillShrinkVirtReg(LI->reg); + if (!LIS.shrinkToUses(LI, &Dead)) + continue; + + // LI may have been separated, create new intervals. + LI->RenumberValues(LIS); + ConnectedVNInfoEqClasses ConEQ(LIS); + unsigned NumComp = ConEQ.Classify(LI); + if (NumComp <= 1) + continue; + DEBUG(dbgs() << NumComp << " components: " << *LI << '\n'); + SmallVector<LiveInterval*, 8> Dups(1, LI); + for (unsigned i = 1; i != NumComp; ++i) { + Dups.push_back(&createFrom(LI->reg, LIS, VRM)); + if (delegate_) + delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg); + } + ConEQ.Distribute(&Dups[0], MRI); + } } +void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, + LiveIntervals &LIS, + const MachineLoopInfo &Loops) { + VirtRegAuxInfo VRAI(MF, LIS, Loops); + for (iterator I = begin(), E = end(); I != E; ++I) { + LiveInterval &LI = **I; + VRAI.CalculateRegClass(LI.reg); + VRAI.CalculateWeightAndHint(LI); + } +} diff --git a/lib/CodeGen/LiveRangeEdit.h b/lib/CodeGen/LiveRangeEdit.h index 73f69ed..14d227e 100644 --- a/lib/CodeGen/LiveRangeEdit.h +++ b/lib/CodeGen/LiveRangeEdit.h @@ -25,13 +25,36 @@ namespace llvm { class AliasAnalysis; class LiveIntervals; +class MachineLoopInfo; class MachineRegisterInfo; class VirtRegMap; class LiveRangeEdit { +public: + /// Callback methods for LiveRangeEdit owners. + struct Delegate { + /// Called immediately before erasing a dead machine instruction. + virtual void LRE_WillEraseInstruction(MachineInstr *MI) {} + + /// Called when a virtual register is no longer used. Return false to defer + /// its deletion from LiveIntervals. + virtual bool LRE_CanEraseVirtReg(unsigned) { return true; } + + /// Called before shrinking the live range of a virtual register. + virtual void LRE_WillShrinkVirtReg(unsigned) {} + + /// Called after cloning a virtual register. + /// This is used for new registers representing connected components of Old. + virtual void LRE_DidCloneVirtReg(unsigned New, unsigned Old) {} + + virtual ~Delegate() {} + }; + +private: LiveInterval &parent_; SmallVectorImpl<LiveInterval*> &newRegs_; - const SmallVectorImpl<LiveInterval*> &uselessRegs_; + Delegate *const delegate_; + const SmallVectorImpl<LiveInterval*> *uselessRegs_; /// firstNew_ - Index of the first register added to newRegs_. const unsigned firstNew_; @@ -41,11 +64,11 @@ class LiveRangeEdit { /// remattable_ - Values defined by remattable instructions as identified by /// tii.isTriviallyReMaterializable(). - SmallPtrSet<VNInfo*,4> remattable_; + SmallPtrSet<const VNInfo*,4> remattable_; /// rematted_ - Values that were actually rematted, and so need to have their /// live range trimmed or entirely removed. - SmallPtrSet<VNInfo*,4> rematted_; + SmallPtrSet<const VNInfo*,4> rematted_; /// scanRemattable - Identify the parent_ values that may rematerialize. void scanRemattable(LiveIntervals &lis, @@ -57,6 +80,11 @@ class LiveRangeEdit { bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, SlotIndex UseIdx, LiveIntervals &lis); + /// foldAsLoad - If LI has a single use and a single def that can be folded as + /// a load, eliminate the register by folding the def into the use. + bool foldAsLoad(LiveInterval *LI, SmallVectorImpl<MachineInstr*> &Dead, + MachineRegisterInfo&, LiveIntervals&, const TargetInstrInfo&); + public: /// Create a LiveRangeEdit for breaking down parent into smaller pieces. /// @param parent The register being spilled or split. @@ -66,9 +94,13 @@ public: /// rematerializing values because they are about to be removed. LiveRangeEdit(LiveInterval &parent, SmallVectorImpl<LiveInterval*> &newRegs, - const SmallVectorImpl<LiveInterval*> &uselessRegs) - : parent_(parent), newRegs_(newRegs), uselessRegs_(uselessRegs), - firstNew_(newRegs.size()), scannedRemattable_(false) {} + Delegate *delegate = 0, + const SmallVectorImpl<LiveInterval*> *uselessRegs = 0) + : parent_(parent), newRegs_(newRegs), + delegate_(delegate), + uselessRegs_(uselessRegs), + firstNew_(newRegs.size()), + scannedRemattable_(false) {} LiveInterval &getParent() const { return parent_; } unsigned getReg() const { return parent_.reg; } @@ -81,16 +113,33 @@ public: bool empty() const { return size() == 0; } LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; } - /// create - Create a new register with the same class and stack slot as + /// FIXME: Temporary accessors until we can get rid of + /// LiveIntervals::AddIntervalsForSpills + SmallVectorImpl<LiveInterval*> *getNewVRegs() { return &newRegs_; } + const SmallVectorImpl<LiveInterval*> *getUselessVRegs() { + return uselessRegs_; + } + + /// createFrom - Create a new virtual register based on OldReg. + LiveInterval &createFrom(unsigned OldReg, LiveIntervals&, VirtRegMap&); + + /// create - Create a new register with the same class and original slot as /// parent. - LiveInterval &create(MachineRegisterInfo&, LiveIntervals&, VirtRegMap&); + LiveInterval &create(LiveIntervals &LIS, VirtRegMap &VRM) { + return createFrom(getReg(), LIS, VRM); + } /// anyRematerializable - Return true if any parent values may be /// rematerializable. - /// This function must be called before ny rematerialization is attempted. + /// This function must be called before any rematerialization is attempted. bool anyRematerializable(LiveIntervals&, const TargetInstrInfo&, AliasAnalysis*); + /// checkRematerializable - Manually add VNI to the list of rematerializable + /// values if DefMI may be rematerializable. + bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, + const TargetInstrInfo&, AliasAnalysis*); + /// Remat - Information needed to rematerialize at a specific location. struct Remat { VNInfo *ParentVNI; // parent_'s value at the remat location. @@ -116,18 +165,35 @@ public: const Remat &RM, LiveIntervals&, const TargetInstrInfo&, - const TargetRegisterInfo&); + const TargetRegisterInfo&, + bool Late = false); /// markRematerialized - explicitly mark a value as rematerialized after doing /// it manually. - void markRematerialized(VNInfo *ParentVNI) { + void markRematerialized(const VNInfo *ParentVNI) { rematted_.insert(ParentVNI); } /// didRematerialize - Return true if ParentVNI was rematerialized anywhere. - bool didRematerialize(VNInfo *ParentVNI) const { + bool didRematerialize(const VNInfo *ParentVNI) const { return rematted_.count(ParentVNI); } + + /// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try + /// to erase it from LIS. + void eraseVirtReg(unsigned Reg, LiveIntervals &LIS); + + /// eliminateDeadDefs - Try to delete machine instructions that are now dead + /// (allDefsAreDead returns true). This may cause live intervals to be trimmed + /// and further dead efs to be eliminated. + void eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, + LiveIntervals&, VirtRegMap&, + const TargetInstrInfo&); + + /// calculateRegClassAndHint - Recompute register class and hint for each new + /// register. + void calculateRegClassAndHint(MachineFunction&, LiveIntervals&, + const MachineLoopInfo&); }; } diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index dd43ef2..20bad60 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -107,9 +107,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo, // Mark the variable known alive in this bb VRInfo.AliveBlocks.set(BBNum); - for (MachineBasicBlock::const_pred_reverse_iterator PI = MBB->pred_rbegin(), - E = MBB->pred_rend(); PI != E; ++PI) - WorkList.push_back(*PI); + WorkList.insert(WorkList.end(), MBB->pred_rbegin(), MBB->pred_rend()); } void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo, @@ -707,7 +705,7 @@ bool LiveVariables::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB) { // Loop over all of the successors of the basic block, checking to see if // the value is either live in the block, or if it is killed in the block. - std::vector<MachineBasicBlock*> OpSuccBlocks; + SmallVector<MachineBasicBlock*, 8> OpSuccBlocks; for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), E = MBB.succ_end(); SI != E; ++SI) { MachineBasicBlock *SuccMBB = *SI; diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index ccbff0a..57f3e34 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -363,8 +363,7 @@ void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) { } void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) { - std::vector<MachineBasicBlock *>::iterator I = - std::find(Predecessors.begin(), Predecessors.end(), pred); + pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), pred); assert(I != Predecessors.end() && "Pred is not a predecessor of this block!"); Predecessors.erase(I); } @@ -402,8 +401,7 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { } bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const { - std::vector<MachineBasicBlock *>::const_iterator I = - std::find(Successors.begin(), Successors.end(), MBB); + const_succ_iterator I = std::find(Successors.begin(), Successors.end(), MBB); return I != Successors.end(); } diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 07a7d27..f97ccf6 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -365,6 +365,8 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { if (!FoundCSE) { // Look for trivial copy coalescing opportunities. if (PerformTrivialCoalescing(MI, MBB)) { + Changed = true; + // After coalescing MI itself may become a copy. if (MI->isCopyLike()) continue; @@ -379,10 +381,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { if (NewMI) { Commuted = true; FoundCSE = VNT.count(NewMI); - if (NewMI != MI) + if (NewMI != MI) { // New instruction. It doesn't need to be kept. NewMI->eraseFromParent(); - else if (!FoundCSE) + Changed = true; + } else if (!FoundCSE) // MI was changed but it didn't help, commute it back! (void)TII->commuteInstruction(MI); } @@ -450,6 +453,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { ++NumPhysCSEs; if (Commuted) ++NumCommutes; + Changed = true; } else { DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); VNT.insert(MI, CurrVN++); diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index aa9ea61..71df6f8 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -441,6 +441,10 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { OS << ")"; } + // Print nontemporal info. + if (MMO.isNonTemporal()) + OS << "(nontemporal)"; + return OS; } @@ -451,7 +455,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { /// MachineInstr ctor - This constructor creates a dummy MachineInstr with /// TID NULL and no operands. MachineInstr::MachineInstr() - : TID(0), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), + : TID(0), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + MemRefs(0), MemRefsEnd(0), Parent(0) { // Make sure that we get added to a machine basicblock LeakDetector::addGarbageObject(this); @@ -470,7 +475,7 @@ void MachineInstr::addImplicitDefUseOperands() { /// implicit operands. It reserves space for the number of operands specified by /// the TargetInstrDesc. MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) - : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), + : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0) { if (!NoImp) NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); @@ -484,8 +489,8 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) /// MachineInstr ctor - As above, but with a DebugLoc. MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, bool NoImp) - : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), - Parent(0), debugLoc(dl) { + : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { if (!NoImp) NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); Operands.reserve(NumImplicitOps + TID->getNumOperands()); @@ -499,7 +504,7 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, /// that the MachineInstr is created and added to the end of the specified /// basic block. MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) - : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), + : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0) { assert(MBB && "Cannot use inserting ctor with null basic block!"); NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); @@ -514,8 +519,8 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) /// MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, const TargetInstrDesc &tid) - : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), - Parent(0), debugLoc(dl) { + : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { assert(MBB && "Cannot use inserting ctor with null basic block!"); NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); Operands.reserve(NumImplicitOps + TID->getNumOperands()); @@ -528,7 +533,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, /// MachineInstr ctor - Copies MachineInstr arg exactly /// MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) - : TID(&MI.getDesc()), NumImplicitOps(0), AsmPrinterFlags(0), + : TID(&MI.getDesc()), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd), Parent(0), debugLoc(MI.getDebugLoc()) { Operands.reserve(MI.getNumOperands()); @@ -538,6 +543,9 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) addOperand(MI.getOperand(i)); NumImplicitOps = MI.NumImplicitOps; + // Copy all the flags. + Flags = MI.Flags; + // Set parent to null. Parent = 0; @@ -1417,6 +1425,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { } bool HaveSemi = false; + if (Flags) { + if (!HaveSemi) OS << ";"; HaveSemi = true; + OS << " flags: "; + + if (Flags & FrameSetup) + OS << "FrameSetup"; + } + if (!memoperands_empty()) { if (!HaveSemi) OS << ";"; HaveSemi = true; @@ -1447,13 +1463,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { } } + // Print debug location information. if (!debugLoc.isUnknown() && MF) { - if (!HaveSemi) OS << ";"; + if (!HaveSemi) OS << ";"; HaveSemi = true; OS << " dbg:"; printDebugLoc(debugLoc, MF, OS); } - OS << "\n"; + OS << '\n'; } bool MachineInstr::addRegisterKilled(unsigned IncomingReg, @@ -1530,13 +1547,8 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg, continue; if (Reg == IncomingReg) { - if (!Found) { - if (MO.isDead()) - // The register is already marked dead. - return true; - MO.setIsDead(); - Found = true; - } + MO.setIsDead(); + Found = true; } else if (hasAliases && MO.isDead() && TargetRegisterInfo::isPhysicalRegister(Reg)) { // There exists a super-register that's marked dead. diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 443fc2d..b315702 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -39,7 +39,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" - using namespace llvm; STATISTIC(NumHoisted, @@ -169,6 +168,10 @@ namespace { /// bool IsLoopInvariantInst(MachineInstr &I); + /// HasAnyPHIUse - Return true if the specified register is used by any + /// phi node. + bool HasAnyPHIUse(unsigned Reg) const; + /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' /// and an use in the current loop, return true if the target considered /// it 'high'. @@ -294,7 +297,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { RegLimit.resize(NumRC); for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), E = TRI->regclass_end(); I != E; ++I) - RegLimit[(*I)->getID()] = TLI->getRegPressureLimit(*I, MF); + RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF); } // Get our Loop information... @@ -758,18 +761,25 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { } -/// HasPHIUses - Return true if the specified register has any PHI use. -static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *MRI) { +/// HasAnyPHIUse - Return true if the specified register is used by any +/// phi node. +bool MachineLICM::HasAnyPHIUse(unsigned Reg) const { for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), UE = MRI->use_end(); UI != UE; ++UI) { MachineInstr *UseMI = &*UI; if (UseMI->isPHI()) return true; + // Look pass copies as well. + if (UseMI->isCopy()) { + unsigned Def = UseMI->getOperand(0).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Def) && + HasAnyPHIUse(Def)) + return true; + } } return false; } - /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' /// and an use in the current loop, return true if the target considered /// it 'high'. @@ -976,14 +986,13 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { return false; } - // If result(s) of this instruction is used by PHIs, then don't hoist it. - // The presence of joins makes it difficult for current register allocator - // implementation to perform remat. + // If result(s) of this instruction is used by PHIs outside of the loop, then + // don't hoist it if the instruction because it will introduce an extra copy. for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - if (HasPHIUses(MO.getReg(), MRI)) + if (HasAnyPHIUse(MO.getReg())) return false; } diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 8a93a24..916dff7 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -265,8 +265,11 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { if (MI->isDebugValue()) continue; - if (PerformTrivialForwardCoalescing(MI, &MBB)) + bool Joined = PerformTrivialForwardCoalescing(MI, &MBB); + if (Joined) { + MadeChange = true; continue; + } if (SinkInstruction(MI, SawStore)) ++NumSunk, MadeChange = true; diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 7351119..f95f411 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -402,6 +402,11 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { SmallVector<MachineOperand, 4> Cond; if (!TII->AnalyzeBranch(*const_cast<MachineBasicBlock *>(MBB), TBB, FBB, Cond)) { + // If the block branches directly to a landing pad successor, pretend that + // the landing pad is a normal block. + LandingPadSuccs.erase(TBB); + LandingPadSuccs.erase(FBB); + // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's // check whether its answers match up with reality. if (!TBB && !FBB) { @@ -602,9 +607,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { // Check Live Variables. if (MI->isDebugValue()) { // Liveness checks are not valid for debug values. - } else if (MO->isUndef()) { - // An <undef> doesn't refer to any register, so just skip it. - } else if (MO->isUse()) { + } else if (MO->isUse() && !MO->isUndef()) { regsLiveInButUnused.erase(Reg); bool isKill = false; @@ -612,13 +615,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MI->isRegTiedToDefOperand(MONum, &defIdx)) { // A two-addr use counts as a kill if use and def are the same. unsigned DefReg = MI->getOperand(defIdx).getReg(); - if (Reg == DefReg) { + if (Reg == DefReg) isKill = true; - // And in that case an explicit kill flag is not allowed. - if (MO->isKill()) - report("Illegal kill flag on two-address instruction operand", - MO, MONum); - } else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { report("Two-address instruction operands must be identical", MO, MONum); } @@ -675,8 +674,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI)); } } - } else { - assert(MO->isDef()); + } else if (MO->isDef()) { // Register defined. // TODO: verify that earlyclobber ops are not used. if (MO->isDead()) diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 5f7cf58..af65f13 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -28,12 +28,17 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include <algorithm> -#include <map> using namespace llvm; +static cl::opt<bool> +DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false), + cl::Hidden, cl::desc("Disable critical edge splitting " + "during PHI elimination")); + namespace { class PHIElimination : public MachineFunctionPass { MachineRegisterInfo *MRI; // Machine register information @@ -105,10 +110,12 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; // Split critical edges to help the coalescer - if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>()) { - MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - Changed |= SplitPHIEdges(MF, *I, *LV, MLI); + if (!DisableEdgeSplitting) { + if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>()) { + MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + Changed |= SplitPHIEdges(MF, *I, *LV, MLI); + } } // Populate VRegPHIUseCount diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 3489db2..315aedd 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -55,6 +55,11 @@ FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) { RegisterRegAlloc::setDefault(RegAlloc); } + // This forces linking of the linear scan register allocator, + // so -regalloc=linearscan still works in clang. + if (Ctor == createLinearScanRegisterAllocator) + return createLinearScanRegisterAllocator(); + if (Ctor != createDefaultRegisterAllocator) return Ctor(); @@ -63,6 +68,6 @@ FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) { case CodeGenOpt::None: return createFastRegisterAllocator(); default: - return createLinearScanRegisterAllocator(); + return createGreedyRegisterAllocator(); } } diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 5d7123c..c105bb0 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -30,6 +30,15 @@ // If the "sub" instruction all ready sets (or could be modified to set) the // same flag that the "cmp" instruction sets and that "bz" uses, then we can // eliminate the "cmp" instruction. +// +// - Optimize Bitcast pairs: +// +// v1 = bitcast v0 +// v2 = bitcast v1 +// = v2 +// => +// v1 = bitcast v0 +// = v0 // //===----------------------------------------------------------------------===// @@ -57,7 +66,8 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer")); STATISTIC(NumReuse, "Number of extension results reused"); -STATISTIC(NumEliminated, "Number of compares eliminated"); +STATISTIC(NumBitcasts, "Number of bitcasts eliminated"); +STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate foled"); namespace { @@ -85,6 +95,7 @@ namespace { } private: + bool OptimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &LocalMIs); @@ -243,12 +254,85 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, return Changed; } +/// OptimizeBitcastInstr - If the instruction is a bitcast instruction A that +/// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast +/// a value cross register classes), and the source is defined by another +/// bitcast instruction B. And if the register class of source of B matches +/// the register class of instruction A, then it is legal to replace all uses +/// of the def of A with source of B. e.g. +/// %vreg0<def> = VMOVSR %vreg1 +/// %vreg3<def> = VMOVRS %vreg0 +/// Replace all uses of vreg3 with vreg1. + +bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI, + MachineBasicBlock *MBB) { + unsigned NumDefs = MI->getDesc().getNumDefs(); + unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs; + if (NumDefs != 1) + return false; + + unsigned Def = 0; + unsigned Src = 0; + for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) + Def = Reg; + else if (Src) + // Multiple sources? + return false; + else + Src = Reg; + } + + assert(Def && Src && "Malformed bitcast instruction!"); + + MachineInstr *DefMI = MRI->getVRegDef(Src); + if (!DefMI || !DefMI->getDesc().isBitcast()) + return false; + + unsigned SrcDef = 0; + unsigned SrcSrc = 0; + NumDefs = DefMI->getDesc().getNumDefs(); + NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs; + if (NumDefs != 1) + return false; + for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) { + const MachineOperand &MO = DefMI->getOperand(i); + if (!MO.isReg() || MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) + SrcDef = Reg; + else if (SrcSrc) + // Multiple sources? + return false; + else + SrcSrc = Reg; + } + + if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def)) + return false; + + MRI->replaceRegWith(Def, SrcSrc); + MRI->clearKillFlags(SrcSrc); + MI->eraseFromParent(); + ++NumBitcasts; + return true; +} + /// OptimizeCmpInstr - If the instruction is a compare and the previous /// instruction it's comparing against all ready sets (or could be modified to /// set) the same flag as the compare, then we can remove the comparison and use /// the flag from the previous instruction. bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, - MachineBasicBlock *MBB){ + MachineBasicBlock *MBB) { // If this instruction is a comparison against zero and isn't comparing a // physical register, we can try to optimize it. unsigned SrcReg; @@ -259,7 +343,7 @@ bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, // Attempt to optimize the comparison instruction. if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) { - ++NumEliminated; + ++NumCmps; return true; } @@ -345,7 +429,16 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { continue; } - if (MI->getDesc().isCompare()) { + const TargetInstrDesc &TID = MI->getDesc(); + + if (TID.isBitcast()) { + if (OptimizeBitcastInstr(MI, MBB)) { + // MI is deleted. + Changed = true; + MII = First ? I->begin() : llvm::next(PMII); + continue; + } + } else if (TID.isCompare()) { if (OptimizeCmpInstr(MI, MBB)) { // MI is deleted. Changed = true; diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 9cd9941..c04d656 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -47,7 +47,6 @@ void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const { bool ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, unsigned Reg, unsigned OpIdx, - const TargetInstrInfo *tii_, SmallSet<unsigned, 8> &ImpDefRegs) { switch(OpIdx) { case 1: @@ -61,7 +60,6 @@ ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, } static bool isUndefCopy(MachineInstr *MI, unsigned Reg, - const TargetInstrInfo *tii_, SmallSet<unsigned, 8> &ImpDefRegs) { if (MI->isCopy()) { MachineOperand &MO0 = MI->getOperand(0); @@ -86,11 +84,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { bool Changed = false; - const TargetInstrInfo *tii_ = fn.getTarget().getInstrInfo(); - const TargetRegisterInfo *tri_ = fn.getTarget().getRegisterInfo(); - MachineRegisterInfo *mri_ = &fn.getRegInfo(); - - LiveVariables *lv_ = &getAnalysis<LiveVariables>(); + TII = fn.getTarget().getInstrInfo(); + TRI = fn.getTarget().getRegisterInfo(); + MRI = &fn.getRegInfo(); + LV = &getAnalysis<LiveVariables>(); SmallSet<unsigned, 8> ImpDefRegs; SmallVector<MachineInstr*, 8> ImpDefMIs; @@ -113,7 +110,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { unsigned Reg = MI->getOperand(0).getReg(); ImpDefRegs.insert(Reg); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - for (const unsigned *SS = tri_->getSubRegisters(Reg); *SS; ++SS) + for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS) ImpDefRegs.insert(*SS); } ImpDefMIs.push_back(MI); @@ -125,7 +122,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { MachineOperand &MO = MI->getOperand(1); if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) { if (MO.isKill()) { - LiveVariables::VarInfo& vi = lv_->getVarInfo(MO.getReg()); + LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg()); vi.removeKill(MI); } MI->eraseFromParent(); @@ -145,14 +142,14 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { if (!ImpDefRegs.count(Reg)) continue; // Use is a copy, just turn it into an implicit_def. - if (CanTurnIntoImplicitDef(MI, Reg, i, tii_, ImpDefRegs)) { + if (CanTurnIntoImplicitDef(MI, Reg, i, ImpDefRegs)) { bool isKill = MO.isKill(); - MI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF)); + MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) MI->RemoveOperand(j); if (isKill) { ImpDefRegs.erase(Reg); - LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg); + LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); vi.removeKill(MI); } ChangedToImpDef = true; @@ -210,8 +207,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { // uses. bool Skip = false; SmallVector<MachineInstr*, 4> DeadImpDefs; - for (MachineRegisterInfo::def_iterator DI = mri_->def_begin(Reg), - DE = mri_->def_end(); DI != DE; ++DI) { + for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg), + DE = MRI->def_end(); DI != DE; ++DI) { MachineInstr *DeadImpDef = &*DI; if (!DeadImpDef->isImplicitDef()) { Skip = true; @@ -229,8 +226,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { Changed = true; // Process each use instruction once. - for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg), - UE = mri_->use_end(); UI != UE; ++UI) { + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), + UE = MRI->use_end(); UI != UE; ++UI) { if (UI.getOperand().isUndef()) continue; MachineInstr *RMI = &*UI; @@ -242,8 +239,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { MachineInstr *RMI = RUses[i]; // Turn a copy use into an implicit_def. - if (isUndefCopy(RMI, Reg, tii_, ImpDefRegs)) { - RMI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF)); + if (isUndefCopy(RMI, Reg, ImpDefRegs)) { + RMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); bool isKill = false; SmallVector<unsigned, 4> Ops; @@ -263,15 +260,15 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { // Update LiveVariables varinfo if the instruction is a kill. if (isKill) { - LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg); + LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); vi.removeKill(RMI); } continue; } // Replace Reg with a new vreg that's marked implicit. - const TargetRegisterClass* RC = mri_->getRegClass(Reg); - unsigned NewVReg = mri_->createVirtualRegister(RC); + const TargetRegisterClass* RC = MRI->getRegClass(Reg); + unsigned NewVReg = MRI->createVirtualRegister(RC); bool isKill = true; for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) { MachineOperand &RRMO = RMI->getOperand(j); diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index ad7b6e4..f1f3c99 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -337,7 +337,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { --BeforeI; // Restore all registers immediately before the return and any - // terminators that preceed it. + // terminators that precede it. if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); @@ -437,7 +437,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { --BeforeI; // Restore all registers immediately before the return and any - // terminators that preceed it. + // terminators that precede it. for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { unsigned Reg = blockCSI[i].getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); @@ -559,7 +559,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Make sure the special register scavenging spill slot is closest to the // frame pointer if a frame pointer is required. const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); - if (RS && TFI.hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) { + if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) && + !RegInfo->needsStackRealignment(Fn)) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); @@ -641,7 +642,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Make sure the special register scavenging spill slot is closest to the // stack pointer. - if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) { + if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) || + !RegInfo->useFPForScavengingIndex(Fn))) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); @@ -811,7 +813,6 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // directly. for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { MachineInstr *MI = I; - bool DoIncr = true; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (MI->getOperand(i).isReg()) { MachineOperand &MO = MI->getOperand(i); @@ -842,10 +843,8 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { } } - if (DoIncr) { - RS->forward(I); - ++I; - } + RS->forward(I); + ++I; } } } diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt index b655dda4..7f75f65 100644 --- a/lib/CodeGen/README.txt +++ b/lib/CodeGen/README.txt @@ -26,7 +26,7 @@ and then "merge" mul and mov: sxth r3, r3 mla r4, r3, lr, r4 -It also increase the likelyhood the store may become dead. +It also increase the likelihood the store may become dead. //===---------------------------------------------------------------------===// @@ -162,7 +162,7 @@ synthesize the various copy insertion/inspection methods in TargetInstrInfo. //===---------------------------------------------------------------------===// -Stack coloring improvments: +Stack coloring improvements: 1. Do proper LiveStackAnalysis on all stack objects including those which are not spill slots. diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h index 5af0ce7..f431d5a 100644 --- a/lib/CodeGen/RegAllocBase.h +++ b/lib/CodeGen/RegAllocBase.h @@ -61,6 +61,11 @@ class LiveVirtRegQueue; /// assignment order. class RegAllocBase { LiveIntervalUnion::Allocator UnionAllocator; + + // Cache tag for PhysReg2LiveUnion entries. Increment whenever virtual + // registers may have changed. + unsigned UserTag; + protected: // Array of LiveIntervalUnions indexed by physical register. class LiveUnionArray { @@ -92,7 +97,7 @@ protected: // query on a new live virtual register. OwningArrayPtr<LiveIntervalUnion::Query> Queries; - RegAllocBase(): TRI(0), MRI(0), VRM(0), LIS(0) {} + RegAllocBase(): UserTag(0), TRI(0), MRI(0), VRM(0), LIS(0) {} virtual ~RegAllocBase() {} @@ -104,7 +109,7 @@ protected: // before querying a new live virtual register. This ties Queries and // PhysReg2LiveUnion together. LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned PhysReg) { - Queries[PhysReg].init(&VirtReg, &PhysReg2LiveUnion[PhysReg]); + Queries[PhysReg].init(UserTag, &VirtReg, &PhysReg2LiveUnion[PhysReg]); return Queries[PhysReg]; } diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 6923908..d92d80f 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -13,7 +13,9 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" +#include "LiveDebugVariables.h" #include "LiveIntervalUnion.h" +#include "LiveRangeEdit.h" #include "RegAllocBase.h" #include "RenderMachineFunction.h" #include "Spiller.h" @@ -136,6 +138,7 @@ char RABasic::ID = 0; } // end anonymous namespace RABasic::RABasic(): MachineFunctionPass(ID) { + initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); @@ -154,6 +157,8 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<AliasAnalysis>(); AU.addRequired<LiveIntervals>(); AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveDebugVariables>(); + AU.addPreserved<LiveDebugVariables>(); if (StrongPHIElim) AU.addRequiredID(StrongPHIEliminationID); AU.addRequiredTransitive<RegisterCoalescer>(); @@ -230,9 +235,12 @@ void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) { MRI = &vrm.getRegInfo(); VRM = &vrm; LIS = &lis; - PhysReg2LiveUnion.init(UnionAllocator, TRI->getNumRegs()); - // Cache an interferece query for each physical reg - Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]); + const unsigned NumRegs = TRI->getNumRegs(); + if (NumRegs != PhysReg2LiveUnion.numRegs()) { + PhysReg2LiveUnion.init(UnionAllocator, NumRegs); + // Cache an interferece query for each physical reg + Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]); + } } void RegAllocBase::LiveUnionArray::clear() { @@ -246,13 +254,15 @@ void RegAllocBase::LiveUnionArray::clear() { } void RegAllocBase::releaseMemory() { - PhysReg2LiveUnion.clear(); + for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r) + PhysReg2LiveUnion[r].clear(); } // Visit all the live registers. If they are already assigned to a physical // register, unify them with the corresponding LiveIntervalUnion, otherwise push // them on the priority queue for later assignment. void RegAllocBase::seedLiveRegs() { + NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled); for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) { unsigned RegNum = I->first; LiveInterval &VirtReg = *I->second; @@ -268,6 +278,7 @@ void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) { << " to " << PrintReg(PhysReg, TRI) << '\n'); assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); VRM->assignVirt2Phys(VirtReg.reg, PhysReg); + MRI->setPhysRegUsed(PhysReg); PhysReg2LiveUnion[PhysReg].unify(VirtReg); ++NumAssigned; } @@ -288,6 +299,18 @@ void RegAllocBase::allocatePhysRegs() { // Continue assigning vregs one at a time to available physical registers. while (LiveInterval *VirtReg = dequeue()) { + assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned"); + + // Unused registers can appear when the spiller coalesces snippets. + if (MRI->reg_nodbg_empty(VirtReg->reg)) { + DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n'); + LIS->removeInterval(VirtReg->reg); + continue; + } + + // Invalidate all interference queries, live ranges could have changed. + ++UserTag; + // selectOrSplit requests the allocator to return an available physical // register if possible and populate a list of new live intervals that // result from splitting. @@ -304,7 +327,12 @@ void RegAllocBase::allocatePhysRegs() { for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end(); I != E; ++I) { LiveInterval *SplitVirtReg = *I; - if (SplitVirtReg->empty()) continue; + assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned"); + if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) { + DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n'); + LIS->removeInterval(SplitVirtReg->reg); + continue; + } DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) && "expect split value in virtual register"); @@ -344,7 +372,8 @@ void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg, unassign(SpilledVReg, PhysReg); // Spill the extracted interval. - spiller().spill(&SpilledVReg, SplitVRegs, PendingSpills); + LiveRangeEdit LRE(SpilledVReg, SplitVRegs, 0, &PendingSpills); + spiller().spill(LRE); } // After extracting segments, the query's results are invalid. But keep the // contents valid until we're done accessing pendingSpills. @@ -381,29 +410,31 @@ RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, // Add newly allocated physical registers to the MBB live in sets. void RegAllocBase::addMBBLiveIns(MachineFunction *MF) { NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled); - typedef SmallVector<MachineBasicBlock*, 8> MBBVec; - MBBVec liveInMBBs; - MachineBasicBlock &entryMBB = *MF->begin(); + SlotIndexes *Indexes = LIS->getSlotIndexes(); + if (MF->size() <= 1) + return; + LiveIntervalUnion::SegmentIter SI; for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) { LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg]; if (LiveUnion.empty()) continue; - for (LiveIntervalUnion::SegmentIter SI = LiveUnion.begin(); SI.valid(); - ++SI) { - - // Find the set of basic blocks which this range is live into... - liveInMBBs.clear(); - if (!LIS->findLiveInMBBs(SI.start(), SI.stop(), liveInMBBs)) continue; - - // And add the physreg for this interval to their live-in sets. - for (MBBVec::iterator I = liveInMBBs.begin(), E = liveInMBBs.end(); - I != E; ++I) { - MachineBasicBlock *MBB = *I; - if (MBB == &entryMBB) continue; - if (MBB->isLiveIn(PhysReg)) continue; - MBB->addLiveIn(PhysReg); - } + MachineFunction::iterator MBB = llvm::next(MF->begin()); + MachineFunction::iterator MFE = MF->end(); + SlotIndex Start, Stop; + tie(Start, Stop) = Indexes->getMBBRange(MBB); + SI.setMap(LiveUnion.getMap()); + SI.find(Start); + while (SI.valid()) { + if (SI.start() <= Start) { + if (!MBB->isLiveIn(PhysReg)) + MBB->addLiveIn(PhysReg); + } else if (SI.start() > Stop) + MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex()); + if (++MBB == MFE) + break; + tie(Start, Stop) = Indexes->getMBBRange(MBB); + SI.advanceTo(Start); } } } @@ -469,9 +500,8 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, } // No other spill candidates were found, so spill the current VirtReg. DEBUG(dbgs() << "spilling: " << VirtReg << '\n'); - SmallVector<LiveInterval*, 1> pendingSpills; - - spiller().spill(&VirtReg, SplitVRegs, pendingSpills); + LiveRangeEdit LRE(VirtReg, SplitVRegs); + spiller().spill(LRE); // The live virtual register requesting allocation was spilled, so tell // the caller not to allocate anything during this round. @@ -490,7 +520,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { ReservedRegs = TRI->getReservedRegs(*MF); - SpillerInstance.reset(createSpiller(*this, *MF, *VRM)); + SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); allocatePhysRegs(); @@ -525,6 +555,9 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { // Run rewriter VRM->rewrite(LIS->getSlotIndexes()); + // Write out new DBG_VALUE instructions. + getAnalysis<LiveDebugVariables>().emitDebugValues(VRM); + // The pass output is in VirtRegMap. Release all the transient data. releaseMemory(); diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 15036e3..b2fd6e0 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -97,7 +97,7 @@ namespace { // immediately without checking aliases. regFree, - // A reserved register has been assigned expolicitly (e.g., setting up a + // A reserved register has been assigned explicitly (e.g., setting up a // call parameter), and it remains reserved until it is used. regReserved @@ -396,7 +396,6 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, PhysRegState[PhysReg] = NewState; for (const unsigned *AS = TRI->getAliasSet(PhysReg); unsigned Alias = *AS; ++AS) { - UsedInInstr.set(Alias); switch (unsigned VirtReg = PhysRegState[Alias]) { case regDisabled: break; @@ -420,20 +419,25 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, // can be allocated directly. // Returns spillImpossible when PhysReg or an alias can't be spilled. unsigned RAFast::calcSpillCost(unsigned PhysReg) const { - if (UsedInInstr.test(PhysReg)) + if (UsedInInstr.test(PhysReg)) { + DEBUG(dbgs() << "PhysReg: " << PhysReg << " is already used in instr.\n"); return spillImpossible; + } switch (unsigned VirtReg = PhysRegState[PhysReg]) { case regDisabled: break; case regFree: return 0; case regReserved: + DEBUG(dbgs() << "VirtReg: " << VirtReg << " corresponding to PhysReg: " + << PhysReg << " is reserved already.\n"); return spillImpossible; default: return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean; } - // This is a disabled register, add up const of aliases. + // This is a disabled register, add up cost of aliases. + DEBUG(dbgs() << "\tRegister: " << PhysReg << " is disabled.\n"); unsigned Cost = 0; for (const unsigned *AS = TRI->getAliasSet(PhysReg); unsigned Alias = *AS; ++AS) { @@ -511,9 +515,14 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { unsigned BestReg = 0, BestCost = spillImpossible; for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) { - if (!Allocatable.test(*I)) + if (!Allocatable.test(*I)) { + DEBUG(dbgs() << "\tRegister " << *I << " is not allocatable.\n"); continue; + } unsigned Cost = calcSpillCost(*I); + DEBUG(dbgs() << "\tRegister: " << *I << "\n"); + DEBUG(dbgs() << "\tCost: " << Cost << "\n"); + DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n"); // Cost is 0 when all aliases are already disabled. if (Cost == 0) return assignVirtToPhysReg(LRE, *I); @@ -722,9 +731,8 @@ void RAFast::handleThroughOperands(MachineInstr *MI, if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + DEBUG(dbgs() << "\tSetting reg " << Reg << " as used in instr\n"); UsedInInstr.set(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) - UsedInInstr.set(*AS); } // Also mark PartialDefs as used to avoid reallocation. diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 406485a..7c461d8 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -14,7 +14,8 @@ #define DEBUG_TYPE "regalloc" #include "AllocationOrder.h" -#include "LiveIntervalUnion.h" +#include "InterferenceCache.h" +#include "LiveDebugVariables.h" #include "LiveRangeEdit.h" #include "RegAllocBase.h" #include "Spiller.h" @@ -49,14 +50,16 @@ using namespace llvm; STATISTIC(NumGlobalSplits, "Number of split global live ranges"); STATISTIC(NumLocalSplits, "Number of split local live ranges"); -STATISTIC(NumReassigned, "Number of interferences reassigned"); STATISTIC(NumEvicted, "Number of interferences evicted"); static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); namespace { -class RAGreedy : public MachineFunctionPass, public RegAllocBase { +class RAGreedy : public MachineFunctionPass, + public RegAllocBase, + private LiveRangeEdit::Delegate { + // context MachineFunction *MF; BitVector ReservedRegs; @@ -72,14 +75,73 @@ class RAGreedy : public MachineFunctionPass, public RegAllocBase { // state std::auto_ptr<Spiller> SpillerInstance; - std::auto_ptr<SplitAnalysis> SA; std::priority_queue<std::pair<unsigned, unsigned> > Queue; - IndexedMap<unsigned, VirtReg2IndexFunctor> Generation; + + // Live ranges pass through a number of stages as we try to allocate them. + // Some of the stages may also create new live ranges: + // + // - Region splitting. + // - Per-block splitting. + // - Local splitting. + // - Spilling. + // + // Ranges produced by one of the stages skip the previous stages when they are + // dequeued. This improves performance because we can skip interference checks + // that are unlikely to give any results. It also guarantees that the live + // range splitting algorithm terminates, something that is otherwise hard to + // ensure. + enum LiveRangeStage { + RS_New, ///< Never seen before. + RS_First, ///< First time in the queue. + RS_Second, ///< Second time in the queue. + RS_Global, ///< Produced by global splitting. + RS_Local, ///< Produced by local splitting. + RS_Spill ///< Produced by spilling. + }; + + IndexedMap<unsigned char, VirtReg2IndexFunctor> LRStage; + + LiveRangeStage getStage(const LiveInterval &VirtReg) const { + return LiveRangeStage(LRStage[VirtReg.reg]); + } + + template<typename Iterator> + void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { + LRStage.resize(MRI->getNumVirtRegs()); + for (;Begin != End; ++Begin) { + unsigned Reg = (*Begin)->reg; + if (LRStage[Reg] == RS_New) + LRStage[Reg] = NewStage; + } + } // splitting state. + std::auto_ptr<SplitAnalysis> SA; + std::auto_ptr<SplitEditor> SE; - /// All basic blocks where the current register is live. - SmallVector<SpillPlacement::BlockConstraint, 8> SpillConstraints; + /// Cached per-block interference maps + InterferenceCache IntfCache; + + /// All basic blocks where the current register has uses. + SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints; + + /// Global live range splitting candidate info. + struct GlobalSplitCandidate { + unsigned PhysReg; + BitVector LiveBundles; + SmallVector<unsigned, 8> ActiveBlocks; + + void reset(unsigned Reg) { + PhysReg = Reg; + LiveBundles.clear(); + ActiveBlocks.clear(); + } + }; + + /// Candidate info for for each PhysReg in AllocationOrder. + /// This vector never shrinks, but grows to the size of the largest register + /// class. + SmallVector<GlobalSplitCandidate, 32> GlobalCand; /// For every instruction in SA->UseSlots, store the previous non-copy /// instruction. @@ -108,42 +170,50 @@ public: static char ID; private: - bool checkUncachedInterference(LiveInterval&, unsigned); - LiveInterval *getSingleInterference(LiveInterval&, unsigned); - bool reassignVReg(LiveInterval &InterferingVReg, unsigned OldPhysReg); - float calcInterferenceWeight(LiveInterval&, unsigned); - float calcInterferenceInfo(LiveInterval&, unsigned); - float calcGlobalSplitCost(const BitVector&); - void splitAroundRegion(LiveInterval&, unsigned, const BitVector&, + void LRE_WillEraseInstruction(MachineInstr*); + bool LRE_CanEraseVirtReg(unsigned); + void LRE_WillShrinkVirtReg(unsigned); + void LRE_DidCloneVirtReg(unsigned, unsigned); + + float calcSpillCost(); + bool addSplitConstraints(InterferenceCache::Cursor, float&); + void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); + void growRegion(GlobalSplitCandidate &Cand, InterferenceCache::Cursor); + float calcGlobalSplitCost(GlobalSplitCandidate&, InterferenceCache::Cursor); + void splitAroundRegion(LiveInterval&, GlobalSplitCandidate&, SmallVectorImpl<LiveInterval*>&); void calcGapWeights(unsigned, SmallVectorImpl<float>&); SlotIndex getPrevMappedIndex(const MachineInstr*); void calcPrevSlots(); unsigned nextSplitPoint(unsigned); - bool canEvictInterference(LiveInterval&, unsigned, unsigned, float&); + bool canEvictInterference(LiveInterval&, unsigned, float&); - unsigned tryReassign(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); + unsigned tryAssign(LiveInterval&, AllocationOrder&, + SmallVectorImpl<LiveInterval*>&); unsigned tryEvict(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<LiveInterval*>&, unsigned = ~0u); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl<LiveInterval*>&); unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl<LiveInterval*>&); unsigned trySplit(LiveInterval&, AllocationOrder&, SmallVectorImpl<LiveInterval*>&); - unsigned trySpillInterferences(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); }; } // end anonymous namespace char RAGreedy::ID = 0; +// Hysteresis to use when comparing floats. +// This helps stabilize decisions based on float comparisons. +const float Hysteresis = 0.98f; + + FunctionPass* llvm::createGreedyRegisterAllocator() { return new RAGreedy(); } -RAGreedy::RAGreedy(): MachineFunctionPass(ID) { +RAGreedy::RAGreedy(): MachineFunctionPass(ID), LRStage(RS_New) { + initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); @@ -166,6 +236,8 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<LiveIntervals>(); AU.addRequired<SlotIndexes>(); AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveDebugVariables>(); + AU.addPreserved<LiveDebugVariables>(); if (StrongPHIElim) AU.addRequiredID(StrongPHIEliminationID); AU.addRequiredTransitive<RegisterCoalescer>(); @@ -185,9 +257,49 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } + +//===----------------------------------------------------------------------===// +// LiveRangeEdit delegate methods +//===----------------------------------------------------------------------===// + +void RAGreedy::LRE_WillEraseInstruction(MachineInstr *MI) { + // LRE itself will remove from SlotIndexes and parent basic block. + VRM->RemoveMachineInstrFromMaps(MI); +} + +bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { + if (unsigned PhysReg = VRM->getPhys(VirtReg)) { + unassign(LIS->getInterval(VirtReg), PhysReg); + return true; + } + // Unassigned virtreg is probably in the priority queue. + // RegAllocBase will erase it after dequeueing. + return false; +} + +void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { + unsigned PhysReg = VRM->getPhys(VirtReg); + if (!PhysReg) + return; + + // Register is assigned, put it back on the queue for reassignment. + LiveInterval &LI = LIS->getInterval(VirtReg); + unassign(LI, PhysReg); + enqueue(&LI); +} + +void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { + // LRE may clone a virtual register because dead code elimination causes it to + // be split into connected components. Ensure that the new register gets the + // same stage as the parent. + LRStage.grow(New); + LRStage[New] = LRStage[Old]; +} + void RAGreedy::releaseMemory() { SpillerInstance.reset(0); - Generation.clear(); + LRStage.clear(); + GlobalCand.clear(); RegAllocBase::releaseMemory(); } @@ -198,20 +310,26 @@ void RAGreedy::enqueue(LiveInterval *LI) { const unsigned Reg = LI->reg; assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Can only enqueue virtual registers"); - const unsigned Hint = VRM->getRegAllocPref(Reg); unsigned Prio; - Generation.grow(Reg); - if (++Generation[Reg] == 1) - // 1st generation ranges are handled first, long -> short. + LRStage.grow(Reg); + if (LRStage[Reg] == RS_New) + LRStage[Reg] = RS_First; + + if (LRStage[Reg] == RS_Second) + // Unsplit ranges that couldn't be allocated immediately are deferred until + // everything else has been allocated. Long ranges are allocated last so + // they are split against realistic interference. + Prio = (1u << 31) - Size; + else { + // Everything else is allocated in long->short order. Long ranges that don't + // fit should be spilled ASAP so they don't create interference. Prio = (1u << 31) + Size; - else - // Repeat offenders are handled second, short -> long - Prio = (1u << 30) - Size; - // Boost ranges that have a physical register hint. - if (TargetRegisterInfo::isPhysicalRegister(Hint)) - Prio |= (1u << 30); + // Boost ranges that have a physical register hint. + if (TargetRegisterInfo::isPhysicalRegister(VRM->getRegAllocPref(Reg))) + Prio |= (1u << 30); + } Queue.push(std::make_pair(Prio, Reg)); } @@ -224,97 +342,34 @@ LiveInterval *RAGreedy::dequeue() { return LI; } + //===----------------------------------------------------------------------===// -// Register Reassignment +// Direct Assignment //===----------------------------------------------------------------------===// -// Check interference without using the cache. -bool RAGreedy::checkUncachedInterference(LiveInterval &VirtReg, - unsigned PhysReg) { - for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { - LiveIntervalUnion::Query subQ(&VirtReg, &PhysReg2LiveUnion[*AliasI]); - if (subQ.checkInterference()) - return true; - } - return false; -} - -/// getSingleInterference - Return the single interfering virtual register -/// assigned to PhysReg. Return 0 if more than one virtual register is -/// interfering. -LiveInterval *RAGreedy::getSingleInterference(LiveInterval &VirtReg, - unsigned PhysReg) { - // Check physreg and aliases. - LiveInterval *Interference = 0; - for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { - LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); - if (Q.checkInterference()) { - if (Interference) - return 0; - if (Q.collectInterferingVRegs(2) > 1) - return 0; - Interference = Q.interferingVRegs().front(); - } - } - return Interference; -} - -// Attempt to reassign this virtual register to a different physical register. -// -// FIXME: we are not yet caching these "second-level" interferences discovered -// in the sub-queries. These interferences can change with each call to -// selectOrSplit. However, we could implement a "may-interfere" cache that -// could be conservatively dirtied when we reassign or split. -// -// FIXME: This may result in a lot of alias queries. We could summarize alias -// live intervals in their parent register's live union, but it's messy. -bool RAGreedy::reassignVReg(LiveInterval &InterferingVReg, - unsigned WantedPhysReg) { - assert(TargetRegisterInfo::isVirtualRegister(InterferingVReg.reg) && - "Can only reassign virtual registers"); - assert(TRI->regsOverlap(WantedPhysReg, VRM->getPhys(InterferingVReg.reg)) && - "inconsistent phys reg assigment"); - - AllocationOrder Order(InterferingVReg.reg, *VRM, ReservedRegs); - while (unsigned PhysReg = Order.next()) { - // Don't reassign to a WantedPhysReg alias. - if (TRI->regsOverlap(PhysReg, WantedPhysReg)) - continue; - - if (checkUncachedInterference(InterferingVReg, PhysReg)) - continue; +/// tryAssign - Try to assign VirtReg to an available register. +unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, + AllocationOrder &Order, + SmallVectorImpl<LiveInterval*> &NewVRegs) { + Order.rewind(); + unsigned PhysReg; + while ((PhysReg = Order.next())) + if (!checkPhysRegInterference(VirtReg, PhysReg)) + break; + if (!PhysReg || Order.isHint(PhysReg)) + return PhysReg; - // Reassign the interfering virtual reg to this physical reg. - unsigned OldAssign = VRM->getPhys(InterferingVReg.reg); - DEBUG(dbgs() << "reassigning: " << InterferingVReg << " from " << - TRI->getName(OldAssign) << " to " << TRI->getName(PhysReg) << '\n'); - unassign(InterferingVReg, OldAssign); - assign(InterferingVReg, PhysReg); - ++NumReassigned; - return true; - } - return false; -} + // PhysReg is available. Try to evict interference from a cheaper alternative. + unsigned Cost = TRI->getCostPerUse(PhysReg); -/// tryReassign - Try to reassign a single interference to a different physreg. -/// @param VirtReg Currently unassigned virtual register. -/// @param Order Physregs to try. -/// @return Physreg to assign VirtReg, or 0. -unsigned RAGreedy::tryReassign(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs){ - NamedRegionTimer T("Reassign", TimerGroupName, TimePassesIsEnabled); + // Most registers have 0 additional cost. + if (!Cost) + return PhysReg; - Order.rewind(); - while (unsigned PhysReg = Order.next()) { - LiveInterval *InterferingVReg = getSingleInterference(VirtReg, PhysReg); - if (!InterferingVReg) - continue; - if (TargetRegisterInfo::isPhysicalRegister(InterferingVReg->reg)) - continue; - if (reassignVReg(*InterferingVReg, PhysReg)) - return PhysReg; - } - return 0; + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost + << '\n'); + unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost); + return CheapReg ? CheapReg : PhysReg; } @@ -323,22 +378,24 @@ unsigned RAGreedy::tryReassign(LiveInterval &VirtReg, AllocationOrder &Order, //===----------------------------------------------------------------------===// /// canEvict - Return true if all interferences between VirtReg and PhysReg can -/// be evicted. Set maxWeight to the maximal spill weight of an interference. +/// be evicted. +/// Return false if any interference is heavier than MaxWeight. +/// On return, set MaxWeight to the maximal spill weight of an interference. bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, - unsigned Size, float &MaxWeight) { + float &MaxWeight) { float Weight = 0; for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); - // If there is 10 or more interferences, chances are one is smaller. - if (Q.collectInterferingVRegs(10) >= 10) + // If there is 10 or more interferences, chances are one is heavier. + if (Q.collectInterferingVRegs(10, MaxWeight) >= 10) return false; - // CHeck if any interfering live range is shorter than VirtReg. - for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { - LiveInterval *Intf = Q.interferingVRegs()[i]; + // Check if any interfering live range is heavier than MaxWeight. + for (unsigned i = Q.interferingVRegs().size(); i; --i) { + LiveInterval *Intf = Q.interferingVRegs()[i - 1]; if (TargetRegisterInfo::isPhysicalRegister(Intf->reg)) return false; - if (Intf->getSize() <= Size) + if (Intf->weight >= MaxWeight) return false; Weight = std::max(Weight, Intf->weight); } @@ -353,25 +410,28 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, /// @return Physreg to assign VirtReg, or 0. unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs){ + SmallVectorImpl<LiveInterval*> &NewVRegs, + unsigned CostPerUseLimit) { NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); - // We can only evict interference if all interfering registers are virtual and - // longer than VirtReg. - const unsigned Size = VirtReg.getSize(); - // Keep track of the lightest single interference seen so far. - float BestWeight = 0; + float BestWeight = VirtReg.weight; unsigned BestPhys = 0; Order.rewind(); while (unsigned PhysReg = Order.next()) { - float Weight = 0; - if (!canEvictInterference(VirtReg, PhysReg, Size, Weight)) + if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) + continue; + // The first use of a register in a function has cost 1. + if (CostPerUseLimit == 1 && !MRI->isPhysRegUsed(PhysReg)) + continue; + + float Weight = BestWeight; + if (!canEvictInterference(VirtReg, PhysReg, Weight)) continue; // This is an eviction candidate. - DEBUG(dbgs() << "max " << PrintReg(PhysReg, TRI) << " interference = " + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " interference = " << Weight << '\n'); if (BestPhys && Weight >= BestWeight) continue; @@ -406,201 +466,228 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, // Region Splitting //===----------------------------------------------------------------------===// -/// calcInterferenceInfo - Compute per-block outgoing and ingoing constraints -/// when considering interference from PhysReg. Also compute an optimistic local -/// cost of this interference pattern. -/// -/// The final cost of a split is the local cost + global cost of preferences -/// broken by SpillPlacement. -/// -float RAGreedy::calcInterferenceInfo(LiveInterval &VirtReg, unsigned PhysReg) { +/// addSplitConstraints - Fill out the SplitConstraints vector based on the +/// interference pattern in Physreg and its aliases. Add the constraints to +/// SpillPlacement and return the static cost of this split in Cost, assuming +/// that all preferences in SplitConstraints are met. +/// Return false if there are no bundles with positive bias. +bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, + float &Cost) { + ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); + // Reset interference dependent info. - SpillConstraints.resize(SA->LiveBlocks.size()); - for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { - SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; - SpillPlacement::BlockConstraint &BC = SpillConstraints[i]; + SplitConstraints.resize(UseBlocks.size()); + float StaticCost = 0; + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; + BC.Number = BI.MBB->getNumber(); - BC.Entry = (BI.Uses && BI.LiveIn) ? - SpillPlacement::PrefReg : SpillPlacement::DontCare; - BC.Exit = (BI.Uses && BI.LiveOut) ? - SpillPlacement::PrefReg : SpillPlacement::DontCare; - BI.OverlapEntry = BI.OverlapExit = false; - } + Intf.moveToBlock(BC.Number); + BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; + BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; - // Add interference info from each PhysReg alias. - for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { - if (!query(VirtReg, *AI).checkInterference()) + if (!Intf.hasInterference()) continue; - LiveIntervalUnion::SegmentIter IntI = - PhysReg2LiveUnion[*AI].find(VirtReg.beginIndex()); - if (!IntI.valid()) - continue; - - // Determine which blocks have interference live in or after the last split - // point. - for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { - SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; - SpillPlacement::BlockConstraint &BC = SpillConstraints[i]; - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); - // Skip interference-free blocks. - if (IntI.start() >= Stop) - continue; - - // Is the interference live-in? - if (BI.LiveIn) { - IntI.advanceTo(Start); - if (!IntI.valid()) - break; - if (IntI.start() <= Start) - BC.Entry = SpillPlacement::MustSpill; - } + // Number of spill code instructions to insert. + unsigned Ins = 0; + + // Interference for the live-in value. + if (BI.LiveIn) { + if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) + BC.Entry = SpillPlacement::MustSpill, ++Ins; + else if (Intf.first() < BI.FirstUse) + BC.Entry = SpillPlacement::PrefSpill, ++Ins; + else if (Intf.first() < (BI.LiveThrough ? BI.LastUse : BI.Kill)) + ++Ins; + } - // Is the interference overlapping the last split point? - if (BI.LiveOut) { - if (IntI.stop() < BI.LastSplitPoint) - IntI.advanceTo(BI.LastSplitPoint.getPrevSlot()); - if (!IntI.valid()) - break; - if (IntI.start() < Stop) - BC.Exit = SpillPlacement::MustSpill; - } + // Interference for the live-out value. + if (BI.LiveOut) { + if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) + BC.Exit = SpillPlacement::MustSpill, ++Ins; + else if (Intf.last() > BI.LastUse) + BC.Exit = SpillPlacement::PrefSpill, ++Ins; + else if (Intf.last() > (BI.LiveThrough ? BI.FirstUse : BI.Def)) + ++Ins; } - // Rewind iterator and check other interferences. - IntI.find(VirtReg.beginIndex()); - for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { - SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; - SpillPlacement::BlockConstraint &BC = SpillConstraints[i]; - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); + // Accumulate the total frequency of inserted spill code. + if (Ins) + StaticCost += Ins * SpillPlacer->getBlockFrequency(BC.Number); + } + Cost = StaticCost; - // Skip interference-free blocks. - if (IntI.start() >= Stop) - continue; + // Add constraints for use-blocks. Note that these are the only constraints + // that may add a positive bias, it is downhill from here. + SpillPlacer->addConstraints(SplitConstraints); + return SpillPlacer->scanActiveBundles(); +} - // Handle transparent blocks with interference separately. - // Transparent blocks never incur any fixed cost. - if (BI.LiveThrough && !BI.Uses) { - IntI.advanceTo(Start); - if (!IntI.valid()) - break; - if (IntI.start() >= Stop) - continue; - if (BC.Entry != SpillPlacement::MustSpill) - BC.Entry = SpillPlacement::PrefSpill; - if (BC.Exit != SpillPlacement::MustSpill) - BC.Exit = SpillPlacement::PrefSpill; - continue; +/// addThroughConstraints - Add constraints and links to SpillPlacer from the +/// live-through blocks in Blocks. +void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, + ArrayRef<unsigned> Blocks) { + const unsigned GroupSize = 8; + SpillPlacement::BlockConstraint BCS[GroupSize]; + unsigned TBS[GroupSize]; + unsigned B = 0, T = 0; + + for (unsigned i = 0; i != Blocks.size(); ++i) { + unsigned Number = Blocks[i]; + Intf.moveToBlock(Number); + + if (!Intf.hasInterference()) { + assert(T < GroupSize && "Array overflow"); + TBS[T] = Number; + if (++T == GroupSize) { + SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T)); + T = 0; } + continue; + } - // Now we only have blocks with uses left. - // Check if the interference overlaps the uses. - assert(BI.Uses && "Non-transparent block without any uses"); - - // Check interference on entry. - if (BI.LiveIn && BC.Entry != SpillPlacement::MustSpill) { - IntI.advanceTo(Start); - if (!IntI.valid()) - break; - // Not live in, but before the first use. - if (IntI.start() < BI.FirstUse) { - BC.Entry = SpillPlacement::PrefSpill; - // If the block contains a kill from an earlier split, never split - // again in the same block. - if (!BI.LiveThrough && !SA->isOriginalEndpoint(BI.Kill)) - BC.Entry = SpillPlacement::MustSpill; - } - } + assert(B < GroupSize && "Array overflow"); + BCS[B].Number = Number; + + // Interference for the live-in value. + if (Intf.first() <= Indexes->getMBBStartIdx(Number)) + BCS[B].Entry = SpillPlacement::MustSpill; + else + BCS[B].Entry = SpillPlacement::PrefSpill; + + // Interference for the live-out value. + if (Intf.last() >= SA->getLastSplitPoint(Number)) + BCS[B].Exit = SpillPlacement::MustSpill; + else + BCS[B].Exit = SpillPlacement::PrefSpill; + + if (++B == GroupSize) { + ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B); + SpillPlacer->addConstraints(Array); + B = 0; + } + } - // Does interference overlap the uses in the entry segment - // [FirstUse;Kill)? - if (BI.LiveIn && !BI.OverlapEntry) { - IntI.advanceTo(BI.FirstUse); - if (!IntI.valid()) - break; - // A live-through interval has no kill. - // Check [FirstUse;LastUse) instead. - if (IntI.start() < (BI.LiveThrough ? BI.LastUse : BI.Kill)) - BI.OverlapEntry = true; - } + ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B); + SpillPlacer->addConstraints(Array); + SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T)); +} - // Does interference overlap the uses in the exit segment [Def;LastUse)? - if (BI.LiveOut && !BI.LiveThrough && !BI.OverlapExit) { - IntI.advanceTo(BI.Def); - if (!IntI.valid()) - break; - if (IntI.start() < BI.LastUse) - BI.OverlapExit = true; - } +void RAGreedy::growRegion(GlobalSplitCandidate &Cand, + InterferenceCache::Cursor Intf) { + // Keep track of through blocks that have not been added to SpillPlacer. + BitVector Todo = SA->getThroughBlocks(); + SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks; + unsigned AddedTo = 0; +#ifndef NDEBUG + unsigned Visited = 0; +#endif - // Check interference on exit. - if (BI.LiveOut && BC.Exit != SpillPlacement::MustSpill) { - // Check interference between LastUse and Stop. - if (BC.Exit != SpillPlacement::PrefSpill) { - IntI.advanceTo(BI.LastUse); - if (!IntI.valid()) - break; - if (IntI.start() < Stop) { - BC.Exit = SpillPlacement::PrefSpill; - // Avoid splitting twice in the same block. - if (!BI.LiveThrough && !SA->isOriginalEndpoint(BI.Def)) - BC.Exit = SpillPlacement::MustSpill; - } - } + for (;;) { + ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive(); + if (NewBundles.empty()) + break; + // Find new through blocks in the periphery of PrefRegBundles. + for (int i = 0, e = NewBundles.size(); i != e; ++i) { + unsigned Bundle = NewBundles[i]; + // Look at all blocks connected to Bundle in the full graph. + ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle); + for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); + I != E; ++I) { + unsigned Block = *I; + if (!Todo.test(Block)) + continue; + Todo.reset(Block); + // This is a new through block. Add it to SpillPlacer later. + ActiveBlocks.push_back(Block); +#ifndef NDEBUG + ++Visited; +#endif } } + // Any new blocks to add? + if (ActiveBlocks.size() > AddedTo) { + ArrayRef<unsigned> Add(&ActiveBlocks[AddedTo], + ActiveBlocks.size() - AddedTo); + addThroughConstraints(Intf, Add); + AddedTo = ActiveBlocks.size(); + } + // Perhaps iterating can enable more bundles? + SpillPlacer->iterate(); } + DEBUG(dbgs() << ", v=" << Visited); +} - // Accumulate a local cost of this interference pattern. - float LocalCost = 0; - for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { - SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; - if (!BI.Uses) - continue; - SpillPlacement::BlockConstraint &BC = SpillConstraints[i]; - unsigned Inserts = 0; - - // Do we need spill code for the entry segment? - if (BI.LiveIn) - Inserts += BI.OverlapEntry || BC.Entry != SpillPlacement::PrefReg; - - // For the exit segment? - if (BI.LiveOut) - Inserts += BI.OverlapExit || BC.Exit != SpillPlacement::PrefReg; - - // The local cost of spill code in this block is the block frequency times - // the number of spill instructions inserted. - if (Inserts) - LocalCost += Inserts * SpillPlacer->getBlockFrequency(BI.MBB); +/// calcSpillCost - Compute how expensive it would be to split the live range in +/// SA around all use blocks instead of forming bundle regions. +float RAGreedy::calcSpillCost() { + float Cost = 0; + const LiveInterval &LI = SA->getParent(); + ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + unsigned Number = BI.MBB->getNumber(); + // We normally only need one spill instruction - a load or a store. + Cost += SpillPlacer->getBlockFrequency(Number); + + // Unless the value is redefined in the block. + if (BI.LiveIn && BI.LiveOut) { + SlotIndex Start, Stop; + tie(Start, Stop) = Indexes->getMBBRange(Number); + LiveInterval::const_iterator I = LI.find(Start); + assert(I != LI.end() && "Expected live-in value"); + // Is there a different live-out value? If so, we need an extra spill + // instruction. + if (I->end < Stop) + Cost += SpillPlacer->getBlockFrequency(Number); + } } - DEBUG(dbgs() << "Local cost of " << PrintReg(PhysReg, TRI) << " = " - << LocalCost << '\n'); - return LocalCost; + return Cost; } /// calcGlobalSplitCost - Return the global split cost of following the split /// pattern in LiveBundles. This cost should be added to the local cost of the -/// interference pattern in SpillConstraints. +/// interference pattern in SplitConstraints. /// -float RAGreedy::calcGlobalSplitCost(const BitVector &LiveBundles) { +float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, + InterferenceCache::Cursor Intf) { float GlobalCost = 0; - for (unsigned i = 0, e = SpillConstraints.size(); i != e; ++i) { - SpillPlacement::BlockConstraint &BC = SpillConstraints[i]; - unsigned Inserts = 0; - // Broken entry preference? - Inserts += LiveBundles[Bundles->getBundle(BC.Number, 0)] != - (BC.Entry == SpillPlacement::PrefReg); - // Broken exit preference? - Inserts += LiveBundles[Bundles->getBundle(BC.Number, 1)] != - (BC.Exit == SpillPlacement::PrefReg); - if (Inserts) - GlobalCost += - Inserts * SpillPlacer->getBlockFrequency(SA->LiveBlocks[i].MBB); + const BitVector &LiveBundles = Cand.LiveBundles; + ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; + bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, 0)]; + bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)]; + unsigned Ins = 0; + + if (BI.LiveIn) + Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); + if (BI.LiveOut) + Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg); + if (Ins) + GlobalCost += Ins * SpillPlacer->getBlockFrequency(BC.Number); + } + + for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { + unsigned Number = Cand.ActiveBlocks[i]; + bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; + bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; + if (!RegIn && !RegOut) + continue; + if (RegIn && RegOut) { + // We need double spill code if this block has interference. + Intf.moveToBlock(Number); + if (Intf.hasInterference()) + GlobalCost += 2*SpillPlacer->getBlockFrequency(Number); + continue; + } + // live-in / stack-out or stack-in live-out. + GlobalCost += SpillPlacer->getBlockFrequency(Number); } - DEBUG(dbgs() << "Global cost = " << GlobalCost << '\n'); return GlobalCost; } @@ -611,113 +698,74 @@ float RAGreedy::calcGlobalSplitCost(const BitVector &LiveBundles) { /// avoiding interference. The 'stack' interval is the complement constructed by /// SplitEditor. It will contain the rest. /// -void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg, - const BitVector &LiveBundles, +void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, + GlobalSplitCandidate &Cand, SmallVectorImpl<LiveInterval*> &NewVRegs) { + const BitVector &LiveBundles = Cand.LiveBundles; + DEBUG({ - dbgs() << "Splitting around region for " << PrintReg(PhysReg, TRI) + dbgs() << "Splitting around region for " << PrintReg(Cand.PhysReg, TRI) << " with bundles"; for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i)) dbgs() << " EB#" << i; dbgs() << ".\n"; }); - // First compute interference ranges in the live blocks. - typedef std::pair<SlotIndex, SlotIndex> IndexPair; - SmallVector<IndexPair, 8> InterferenceRanges; - InterferenceRanges.resize(SA->LiveBlocks.size()); - for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { - if (!query(VirtReg, *AI).checkInterference()) - continue; - LiveIntervalUnion::SegmentIter IntI = - PhysReg2LiveUnion[*AI].find(VirtReg.beginIndex()); - if (!IntI.valid()) - continue; - for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { - const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; - IndexPair &IP = InterferenceRanges[i]; - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); - // Skip interference-free blocks. - if (IntI.start() >= Stop) - continue; - - // First interference in block. - if (BI.LiveIn) { - IntI.advanceTo(Start); - if (!IntI.valid()) - break; - if (IntI.start() >= Stop) - continue; - if (!IP.first.isValid() || IntI.start() < IP.first) - IP.first = IntI.start(); - } - - // Last interference in block. - if (BI.LiveOut) { - IntI.advanceTo(Stop); - if (!IntI.valid() || IntI.start() >= Stop) - --IntI; - if (IntI.stop() <= Start) - continue; - if (!IP.second.isValid() || IntI.stop() > IP.second) - IP.second = IntI.stop(); - } - } - } - - SmallVector<LiveInterval*, 4> SpillRegs; - LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs); - SplitEditor SE(*SA, *LIS, *VRM, *DomTree, LREdit); + InterferenceCache::Cursor Intf(IntfCache, Cand.PhysReg); + LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + SE->reset(LREdit); // Create the main cross-block interval. - SE.openIntv(); + const unsigned MainIntv = SE->openIntv(); // First add all defs that are live out of a block. - for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { - SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; + ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; + // Create separate intervals for isolated blocks with multiple uses. + if (!RegIn && !RegOut && BI.FirstUse != BI.LastUse) { + DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); + SE->splitSingleBlock(BI); + SE->selectIntv(MainIntv); + continue; + } + // Should the register be live out? if (!BI.LiveOut || !RegOut) continue; - IndexPair &IP = InterferenceRanges[i]; SlotIndex Start, Stop; tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); - + Intf.moveToBlock(BI.MBB->getNumber()); DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " -> EB#" << Bundles->getBundle(BI.MBB->getNumber(), 1) - << " intf [" << IP.first << ';' << IP.second << ')'); + << " [" << Start << ';' + << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop + << ") intf [" << Intf.first() << ';' << Intf.last() << ')'); // The interference interval should either be invalid or overlap MBB. - assert((!IP.first.isValid() || IP.first < Stop) && "Bad interference"); - assert((!IP.second.isValid() || IP.second > Start) && "Bad interference"); + assert((!Intf.hasInterference() || Intf.first() < Stop) + && "Bad interference"); + assert((!Intf.hasInterference() || Intf.last() > Start) + && "Bad interference"); // Check interference leaving the block. - if (!IP.second.isValid()) { + if (!Intf.hasInterference()) { // Block is interference-free. DEBUG(dbgs() << ", no interference"); - if (!BI.Uses) { - assert(BI.LiveThrough && "No uses, but not live through block?"); - // Block is live-through without interference. - DEBUG(dbgs() << ", no uses" - << (RegIn ? ", live-through.\n" : ", stack in.\n")); - if (!RegIn) - SE.enterIntvAtEnd(*BI.MBB); - continue; - } if (!BI.LiveThrough) { DEBUG(dbgs() << ", not live-through.\n"); - SE.useIntv(SE.enterIntvBefore(BI.Def), Stop); + SE->useIntv(SE->enterIntvBefore(BI.Def), Stop); continue; } if (!RegIn) { // Block is live-through, but entry bundle is on the stack. // Reload just before the first use. DEBUG(dbgs() << ", not live-in, enter before first use.\n"); - SE.useIntv(SE.enterIntvBefore(BI.FirstUse), Stop); + SE->useIntv(SE->enterIntvBefore(BI.FirstUse), Stop); continue; } DEBUG(dbgs() << ", live-through.\n"); @@ -725,53 +773,45 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg, } // Block has interference. - DEBUG(dbgs() << ", interference to " << IP.second); + DEBUG(dbgs() << ", interference to " << Intf.last()); - if (!BI.LiveThrough && IP.second <= BI.Def) { + if (!BI.LiveThrough && Intf.last() <= BI.Def) { // The interference doesn't reach the outgoing segment. DEBUG(dbgs() << " doesn't affect def from " << BI.Def << '\n'); - SE.useIntv(BI.Def, Stop); + SE->useIntv(BI.Def, Stop); continue; } - - if (!BI.Uses) { - // No uses in block, avoid interference by reloading as late as possible. - DEBUG(dbgs() << ", no uses.\n"); - SlotIndex SegStart = SE.enterIntvAtEnd(*BI.MBB); - assert(SegStart >= IP.second && "Couldn't avoid interference"); - continue; - } - - if (IP.second.getBoundaryIndex() < BI.LastUse) { + SlotIndex LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber()); + if (Intf.last().getBoundaryIndex() < BI.LastUse) { // There are interference-free uses at the end of the block. // Find the first use that can get the live-out register. SmallVectorImpl<SlotIndex>::const_iterator UI = std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(), - IP.second.getBoundaryIndex()); + Intf.last().getBoundaryIndex()); assert(UI != SA->UseSlots.end() && "Couldn't find last use"); SlotIndex Use = *UI; assert(Use <= BI.LastUse && "Couldn't find last use"); // Only attempt a split befroe the last split point. - if (Use.getBaseIndex() <= BI.LastSplitPoint) { + if (Use.getBaseIndex() <= LastSplitPoint) { DEBUG(dbgs() << ", free use at " << Use << ".\n"); - SlotIndex SegStart = SE.enterIntvBefore(Use); - assert(SegStart >= IP.second && "Couldn't avoid interference"); - assert(SegStart < BI.LastSplitPoint && "Impossible split point"); - SE.useIntv(SegStart, Stop); + SlotIndex SegStart = SE->enterIntvBefore(Use); + assert(SegStart >= Intf.last() && "Couldn't avoid interference"); + assert(SegStart < LastSplitPoint && "Impossible split point"); + SE->useIntv(SegStart, Stop); continue; } } // Interference is after the last use. DEBUG(dbgs() << " after last use.\n"); - SlotIndex SegStart = SE.enterIntvAtEnd(*BI.MBB); - assert(SegStart >= IP.second && "Couldn't avoid interference"); + SlotIndex SegStart = SE->enterIntvAtEnd(*BI.MBB); + assert(SegStart >= Intf.last() && "Couldn't avoid interference"); } // Now all defs leading to live bundles are handled, do everything else. - for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { - SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; @@ -780,152 +820,207 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg, continue; // We have an incoming register. Check for interference. - IndexPair &IP = InterferenceRanges[i]; SlotIndex Start, Stop; tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); - + Intf.moveToBlock(BI.MBB->getNumber()); DEBUG(dbgs() << "EB#" << Bundles->getBundle(BI.MBB->getNumber(), 0) - << " -> BB#" << BI.MBB->getNumber()); + << " -> BB#" << BI.MBB->getNumber() << " [" << Start << ';' + << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop + << ')'); // Check interference entering the block. - if (!IP.first.isValid()) { + if (!Intf.hasInterference()) { // Block is interference-free. DEBUG(dbgs() << ", no interference"); - if (!BI.Uses) { - assert(BI.LiveThrough && "No uses, but not live through block?"); - // Block is live-through without interference. - if (RegOut) { - DEBUG(dbgs() << ", no uses, live-through.\n"); - SE.useIntv(Start, Stop); - } else { - DEBUG(dbgs() << ", no uses, stack-out.\n"); - SE.leaveIntvAtTop(*BI.MBB); - } - continue; - } if (!BI.LiveThrough) { DEBUG(dbgs() << ", killed in block.\n"); - SE.useIntv(Start, SE.leaveIntvAfter(BI.Kill)); + SE->useIntv(Start, SE->leaveIntvAfter(BI.Kill)); continue; } if (!RegOut) { + SlotIndex LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber()); // Block is live-through, but exit bundle is on the stack. // Spill immediately after the last use. - if (BI.LastUse < BI.LastSplitPoint) { + if (BI.LastUse < LastSplitPoint) { DEBUG(dbgs() << ", uses, stack-out.\n"); - SE.useIntv(Start, SE.leaveIntvAfter(BI.LastUse)); + SE->useIntv(Start, SE->leaveIntvAfter(BI.LastUse)); continue; } // The last use is after the last split point, it is probably an // indirect jump. DEBUG(dbgs() << ", uses at " << BI.LastUse << " after split point " - << BI.LastSplitPoint << ", stack-out.\n"); - SlotIndex SegEnd = SE.leaveIntvBefore(BI.LastSplitPoint); - SE.useIntv(Start, SegEnd); + << LastSplitPoint << ", stack-out.\n"); + SlotIndex SegEnd = SE->leaveIntvBefore(LastSplitPoint); + SE->useIntv(Start, SegEnd); // Run a double interval from the split to the last use. // This makes it possible to spill the complement without affecting the // indirect branch. - SE.overlapIntv(SegEnd, BI.LastUse); + SE->overlapIntv(SegEnd, BI.LastUse); continue; } // Register is live-through. DEBUG(dbgs() << ", uses, live-through.\n"); - SE.useIntv(Start, Stop); + SE->useIntv(Start, Stop); continue; } // Block has interference. - DEBUG(dbgs() << ", interference from " << IP.first); + DEBUG(dbgs() << ", interference from " << Intf.first()); - if (!BI.LiveThrough && IP.first >= BI.Kill) { + if (!BI.LiveThrough && Intf.first() >= BI.Kill) { // The interference doesn't reach the outgoing segment. DEBUG(dbgs() << " doesn't affect kill at " << BI.Kill << '\n'); - SE.useIntv(Start, BI.Kill); + SE->useIntv(Start, BI.Kill); continue; } - if (!BI.Uses) { - // No uses in block, avoid interference by spilling as soon as possible. - DEBUG(dbgs() << ", no uses.\n"); - SlotIndex SegEnd = SE.leaveIntvAtTop(*BI.MBB); - assert(SegEnd <= IP.first && "Couldn't avoid interference"); - continue; - } - if (IP.first.getBaseIndex() > BI.FirstUse) { + if (Intf.first().getBaseIndex() > BI.FirstUse) { // There are interference-free uses at the beginning of the block. // Find the last use that can get the register. SmallVectorImpl<SlotIndex>::const_iterator UI = std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(), - IP.first.getBaseIndex()); + Intf.first().getBaseIndex()); assert(UI != SA->UseSlots.begin() && "Couldn't find first use"); SlotIndex Use = (--UI)->getBoundaryIndex(); DEBUG(dbgs() << ", free use at " << *UI << ".\n"); - SlotIndex SegEnd = SE.leaveIntvAfter(Use); - assert(SegEnd <= IP.first && "Couldn't avoid interference"); - SE.useIntv(Start, SegEnd); + SlotIndex SegEnd = SE->leaveIntvAfter(Use); + assert(SegEnd <= Intf.first() && "Couldn't avoid interference"); + SE->useIntv(Start, SegEnd); continue; } // Interference is before the first use. DEBUG(dbgs() << " before first use.\n"); - SlotIndex SegEnd = SE.leaveIntvAtTop(*BI.MBB); - assert(SegEnd <= IP.first && "Couldn't avoid interference"); + SlotIndex SegEnd = SE->leaveIntvAtTop(*BI.MBB); + assert(SegEnd <= Intf.first() && "Couldn't avoid interference"); } - SE.closeIntv(); + // Handle live-through blocks. + for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { + unsigned Number = Cand.ActiveBlocks[i]; + bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; + bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; + DEBUG(dbgs() << "Live through BB#" << Number << '\n'); + if (RegIn && RegOut) { + Intf.moveToBlock(Number); + if (!Intf.hasInterference()) { + SE->useIntv(Indexes->getMBBStartIdx(Number), + Indexes->getMBBEndIdx(Number)); + continue; + } + } + MachineBasicBlock *MBB = MF->getBlockNumbered(Number); + if (RegIn) + SE->leaveIntvAtTop(*MBB); + if (RegOut) + SE->enterIntvAtEnd(*MBB); + } - // FIXME: Should we be more aggressive about splitting the stack region into - // per-block segments? The current approach allows the stack region to - // separate into connected components. Some components may be allocatable. - SE.finish(); ++NumGlobalSplits; - if (VerifyEnabled) { - MF->verify(this, "After splitting live range around region"); + SmallVector<unsigned, 8> IntvMap; + SE->finish(&IntvMap); + LRStage.resize(MRI->getNumVirtRegs()); + unsigned OrigBlocks = SA->getNumThroughBlocks() + SA->getUseBlocks().size(); + + // Sort out the new intervals created by splitting. We get four kinds: + // - Remainder intervals should not be split again. + // - Candidate intervals can be assigned to Cand.PhysReg. + // - Block-local splits are candidates for local splitting. + // - DCE leftovers should go back on the queue. + for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { + unsigned Reg = LREdit.get(i)->reg; + + // Ignore old intervals from DCE. + if (LRStage[Reg] != RS_New) + continue; -#ifndef NDEBUG - // Make sure that at least one of the new intervals can allocate to PhysReg. - // That was the whole point of splitting the live range. - bool found = false; - for (LiveRangeEdit::iterator I = LREdit.begin(), E = LREdit.end(); I != E; - ++I) - if (!checkUncachedInterference(**I, PhysReg)) { - found = true; - break; + // Remainder interval. Don't try splitting again, spill if it doesn't + // allocate. + if (IntvMap[i] == 0) { + LRStage[Reg] = RS_Global; + continue; + } + + // Main interval. Allow repeated splitting as long as the number of live + // blocks is strictly decreasing. + if (IntvMap[i] == MainIntv) { + if (SA->countLiveBlocks(LREdit.get(i)) >= OrigBlocks) { + DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks + << " blocks as original.\n"); + // Don't allow repeated splitting as a safe guard against looping. + LRStage[Reg] = RS_Global; } - assert(found && "No allocatable intervals after pointless splitting"); -#endif + continue; + } + + // Other intervals are treated as new. This includes local intervals created + // for blocks with multiple uses, and anything created by DCE. } + + if (VerifyEnabled) + MF->verify(this, "After splitting live range around region"); } unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<LiveInterval*> &NewVRegs) { - BitVector LiveBundles, BestBundles; - float BestCost = 0; - unsigned BestReg = 0; + float BestCost = Hysteresis * calcSpillCost(); + DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n'); + const unsigned NoCand = ~0u; + unsigned BestCand = NoCand; + Order.rewind(); - while (unsigned PhysReg = Order.next()) { - float Cost = calcInterferenceInfo(VirtReg, PhysReg); - if (BestReg && Cost >= BestCost) + for (unsigned Cand = 0; unsigned PhysReg = Order.next(); ++Cand) { + if (GlobalCand.size() <= Cand) + GlobalCand.resize(Cand+1); + GlobalCand[Cand].reset(PhysReg); + + SpillPlacer->prepare(GlobalCand[Cand].LiveBundles); + float Cost; + InterferenceCache::Cursor Intf(IntfCache, PhysReg); + if (!addSplitConstraints(Intf, Cost)) { + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); + continue; + } + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = " << Cost); + if (Cost >= BestCost) { + DEBUG({ + if (BestCand == NoCand) + dbgs() << " worse than no bundles\n"; + else + dbgs() << " worse than " + << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n'; + }); continue; + } + growRegion(GlobalCand[Cand], Intf); + + SpillPlacer->finish(); - SpillPlacer->placeSpills(SpillConstraints, LiveBundles); // No live bundles, defer to splitSingleBlocks(). - if (!LiveBundles.any()) + if (!GlobalCand[Cand].LiveBundles.any()) { + DEBUG(dbgs() << " no bundles.\n"); continue; + } - Cost += calcGlobalSplitCost(LiveBundles); - if (!BestReg || Cost < BestCost) { - BestReg = PhysReg; - BestCost = Cost; - BestBundles.swap(LiveBundles); + Cost += calcGlobalSplitCost(GlobalCand[Cand], Intf); + DEBUG({ + dbgs() << ", total = " << Cost << " with bundles"; + for (int i = GlobalCand[Cand].LiveBundles.find_first(); i>=0; + i = GlobalCand[Cand].LiveBundles.find_next(i)) + dbgs() << " EB#" << i; + dbgs() << ".\n"; + }); + if (Cost < BestCost) { + BestCand = Cand; + BestCost = Hysteresis * Cost; // Prevent rounding effects. } } - if (!BestReg) + if (BestCand == NoCand) return 0; - splitAroundRegion(VirtReg, BestReg, BestBundles, NewVRegs); + splitAroundRegion(VirtReg, GlobalCand[BestCand], NewVRegs); return 0; } @@ -942,8 +1037,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// void RAGreedy::calcGapWeights(unsigned PhysReg, SmallVectorImpl<float> &GapWeight) { - assert(SA->LiveBlocks.size() == 1 && "Not a local interval"); - const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks.front(); + assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); + const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots; const unsigned NumGaps = Uses.size()-1; @@ -1034,8 +1129,8 @@ unsigned RAGreedy::nextSplitPoint(unsigned i) { /// unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<LiveInterval*> &NewVRegs) { - assert(SA->LiveBlocks.size() == 1 && "Not a local interval"); - const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks.front(); + assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); + const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); // Note that it is possible to have an interval that is live-in or live-out // while only covering a single block - A phi-def can use undef values from @@ -1065,7 +1160,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned BestAfter = 0; float BestDiff = 0; - const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB); + const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB->getNumber()); SmallVector<float, 8> GapWeight; Order.rewind(); @@ -1130,13 +1225,13 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, PrevSlot[SplitBefore].distance(Uses[SplitAfter])); // Would this split be possible to allocate? // Never allocate all gaps, we wouldn't be making progress. - float Diff = EstWeight - MaxGap; - DEBUG(dbgs() << " w=" << EstWeight << " d=" << Diff); - if (Diff > 0) { + DEBUG(dbgs() << " w=" << EstWeight); + if (EstWeight * Hysteresis >= MaxGap) { Shrink = false; + float Diff = EstWeight - MaxGap; if (Diff > BestDiff) { DEBUG(dbgs() << " (best)"); - BestDiff = Diff; + BestDiff = Hysteresis * Diff; BestBefore = SplitBefore; BestAfter = SplitAfter; } @@ -1181,16 +1276,15 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, << '-' << Uses[BestAfter] << ", " << BestDiff << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); - SmallVector<LiveInterval*, 4> SpillRegs; - LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs); - SplitEditor SE(*SA, *LIS, *VRM, *DomTree, LREdit); + LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + SE->reset(LREdit); - SE.openIntv(); - SlotIndex SegStart = SE.enterIntvBefore(Uses[BestBefore]); - SlotIndex SegStop = SE.leaveIntvAfter(Uses[BestAfter]); - SE.useIntv(SegStart, SegStop); - SE.closeIntv(); - SE.finish(); + SE->openIntv(); + SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]); + SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]); + SE->useIntv(SegStart, SegStop); + SE->finish(); + setStage(NewVRegs.begin(), NewVRegs.end(), RS_Local); ++NumLocalSplits; return 0; @@ -1205,16 +1299,22 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<LiveInterval*>&NewVRegs) { - SA->analyze(&VirtReg); - // Local intervals are handled separately. if (LIS->intervalIsInOneMBB(VirtReg)) { NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled); + SA->analyze(&VirtReg); return tryLocalSplit(VirtReg, Order, NewVRegs); } NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled); + // Don't iterate global splitting. + // Move straight to spilling if this range was produced by a global split. + if (getStage(VirtReg) >= RS_Global) + return 0; + + SA->analyze(&VirtReg); + // First try to split around a region spanning multiple blocks. unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); if (PhysReg || !NewVRegs.empty()) @@ -1223,9 +1323,10 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, // Then isolate blocks with multiple uses. SplitAnalysis::BlockPtrSet Blocks; if (SA->getMultiUseBlocks(Blocks)) { - SmallVector<LiveInterval*, 4> SpillRegs; - LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs); - SplitEditor(*SA, *LIS, *VRM, *DomTree, LREdit).splitSingleBlocks(Blocks); + LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + SE->reset(LREdit); + SE->splitSingleBlocks(Blocks); + setStage(NewVRegs.begin(), NewVRegs.end(), RS_Global); if (VerifyEnabled) MF->verify(this, "After splitting live range around basic blocks"); } @@ -1236,68 +1337,6 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, //===----------------------------------------------------------------------===// -// Spilling -//===----------------------------------------------------------------------===// - -/// calcInterferenceWeight - Calculate the combined spill weight of -/// interferences when assigning VirtReg to PhysReg. -float RAGreedy::calcInterferenceWeight(LiveInterval &VirtReg, unsigned PhysReg){ - float Sum = 0; - for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { - LiveIntervalUnion::Query &Q = query(VirtReg, *AI); - Q.collectInterferingVRegs(); - if (Q.seenUnspillableVReg()) - return HUGE_VALF; - for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) - Sum += Q.interferingVRegs()[i]->weight; - } - return Sum; -} - -/// trySpillInterferences - Try to spill interfering registers instead of the -/// current one. Only do it if the accumulated spill weight is smaller than the -/// current spill weight. -unsigned RAGreedy::trySpillInterferences(LiveInterval &VirtReg, - AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs) { - NamedRegionTimer T("Spill Interference", TimerGroupName, TimePassesIsEnabled); - unsigned BestPhys = 0; - float BestWeight = 0; - - Order.rewind(); - while (unsigned PhysReg = Order.next()) { - float Weight = calcInterferenceWeight(VirtReg, PhysReg); - if (Weight == HUGE_VALF || Weight >= VirtReg.weight) - continue; - if (!BestPhys || Weight < BestWeight) - BestPhys = PhysReg, BestWeight = Weight; - } - - // No candidates found. - if (!BestPhys) - return 0; - - // Collect all interfering registers. - SmallVector<LiveInterval*, 8> Spills; - for (const unsigned *AI = TRI->getOverlaps(BestPhys); *AI; ++AI) { - LiveIntervalUnion::Query &Q = query(VirtReg, *AI); - Spills.append(Q.interferingVRegs().begin(), Q.interferingVRegs().end()); - for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { - LiveInterval *VReg = Q.interferingVRegs()[i]; - unassign(*VReg, *AI); - } - } - - // Spill them all. - DEBUG(dbgs() << "spilling " << Spills.size() << " interferences with weight " - << BestWeight << '\n'); - for (unsigned i = 0, e = Spills.size(); i != e; ++i) - spiller().spill(Spills[i], NewVRegs, Spills); - return BestPhys; -} - - -//===----------------------------------------------------------------------===// // Main Entry Point //===----------------------------------------------------------------------===// @@ -1305,12 +1344,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl<LiveInterval*> &NewVRegs) { // First try assigning a free register. AllocationOrder Order(VirtReg.reg, *VRM, ReservedRegs); - while (unsigned PhysReg = Order.next()) { - if (!checkPhysRegInterference(VirtReg, PhysReg)) - return PhysReg; - } - - if (unsigned PhysReg = tryReassign(VirtReg, Order, NewVRegs)) + if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) return PhysReg; if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs)) @@ -1321,25 +1355,29 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // The first time we see a live range, don't try to split or spill. // Wait until the second time, when all smaller ranges have been allocated. // This gives a better picture of the interference to split around. - if (Generation[VirtReg.reg] == 1) { + LiveRangeStage Stage = getStage(VirtReg); + if (Stage == RS_First) { + LRStage[VirtReg.reg] = RS_Second; + DEBUG(dbgs() << "wait for second round\n"); NewVRegs.push_back(&VirtReg); return 0; } + assert(Stage < RS_Spill && "Cannot allocate after spilling"); + // Try splitting VirtReg or interferences. unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); if (PhysReg || !NewVRegs.empty()) return PhysReg; - // Try to spill another interfering reg with less spill weight. - PhysReg = trySpillInterferences(VirtReg, Order, NewVRegs); - if (PhysReg) - return PhysReg; - // Finally spill VirtReg itself. NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); - SmallVector<LiveInterval*, 1> pendingSpills; - spiller().spill(&VirtReg, NewVRegs, pendingSpills); + LiveRangeEdit LRE(VirtReg, NewVRegs, this); + spiller().spill(LRE); + setStage(NewVRegs.begin(), NewVRegs.end(), RS_Spill); + + if (VerifyEnabled) + MF->verify(this, "After spilling"); // The live virtual register requesting allocation was spilled, so tell // the caller not to allocate anything during this round. @@ -1366,6 +1404,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { SpillPlacer = &getAnalysis<SpillPlacement>(); SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); + SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree)); + LRStage.clear(); + LRStage.resize(MRI->getNumVirtRegs()); + IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI); allocatePhysRegs(); addMBBLiveIns(MF); @@ -1377,6 +1419,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { VRM->rewrite(Indexes); } + // Write out new DBG_VALUE instructions. + getAnalysis<LiveDebugVariables>().emitDebugValues(VRM); + // The pass output is in VirtRegMap. Release all the transient data. releaseMemory(); diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index b959878..5ef88cb 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -13,6 +13,7 @@ #define DEBUG_TYPE "regalloc" #include "LiveDebugVariables.h" +#include "LiveRangeEdit.h" #include "VirtRegMap.h" #include "VirtRegRewriter.h" #include "Spiller.h" @@ -39,7 +40,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> -#include <set> #include <queue> #include <memory> #include <cmath> @@ -66,6 +66,11 @@ TrivCoalesceEnds("trivial-coalesce-ends", cl::desc("Attempt trivial coalescing of interval ends"), cl::init(false), cl::Hidden); +static cl::opt<bool> +AvoidWAWHazard("avoid-waw-hazard", + cl::desc("Avoid write-write hazards for some register classes"), + cl::init(false), cl::Hidden); + static RegisterRegAlloc linearscanRegAlloc("linearscan", "linear scan register allocator", createLinearScanRegisterAllocator); @@ -109,6 +114,7 @@ namespace { if (NumRecentlyUsedRegs > 0) RecentRegs.resize(NumRecentlyUsedRegs, 0); RecentNext = RecentRegs.begin(); + avoidWAW_ = 0; } typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr; @@ -179,6 +185,9 @@ namespace { SmallVector<unsigned, 4> RecentRegs; SmallVector<unsigned, 4>::iterator RecentNext; + // Last write-after-write register written. + unsigned avoidWAW_; + // Record that we just picked this register. void recordRecentlyUsed(unsigned reg) { assert(reg != 0 && "Recently used register is NOREG!"); @@ -226,8 +235,8 @@ namespace { // Determine if we skip this register due to its being recently used. bool isRecentlyUsed(unsigned reg) const { - return std::find(RecentRegs.begin(), RecentRegs.end(), reg) != - RecentRegs.end(); + return reg == avoidWAW_ || + std::find(RecentRegs.begin(), RecentRegs.end(), reg) != RecentRegs.end(); } private: @@ -374,7 +383,7 @@ namespace { dbgs() << str << " intervals:\n"; for (; i != e; ++i) { - dbgs() << "\t" << *i->first << " -> "; + dbgs() << '\t' << *i->first << " -> "; unsigned reg = i->first->reg; if (TargetRegisterInfo::isVirtualRegister(reg)) @@ -389,7 +398,7 @@ namespace { } INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc", - "Linear Scan Register Allocator", false, false) + "Linear Scan Register Allocator", false, false) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination) INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights) @@ -400,7 +409,7 @@ INITIALIZE_PASS_DEPENDENCY(VirtRegMap) INITIALIZE_AG_DEPENDENCY(RegisterCoalescer) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc", - "Linear Scan Register Allocator", false, false) + "Linear Scan Register Allocator", false, false) void RALinScan::ComputeRelatedRegClasses() { // First pass, add all reg classes to the union, and determine at least one @@ -458,7 +467,7 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { const LiveRange &range = cur.ranges.front(); VNInfo *vni = range.valno; - if (vni->isUnused()) + if (vni->isUnused() || !vni->def.isValid()) return Reg; unsigned CandReg; @@ -571,7 +580,7 @@ void RALinScan::initIntervalSets() for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) { if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) { - if (!i->second->empty()) { + if (!i->second->empty() && allocatableRegs_.test(i->second->reg)) { mri_->setPhysRegUsed(i->second->reg); fixed_.push_back(std::make_pair(i->second, i->second->begin())); } @@ -791,7 +800,7 @@ void RALinScan::updateSpillWeights(std::vector<float> &Weights, // register class we are trying to allocate. Then add the weight to all // sub-registers of the super-register even if they are not aliases. // e.g. allocating for GR32, bh is not used, updating bl spill weight. - // bl should get the same spill weight otherwise it will be choosen + // bl should get the same spill weight otherwise it will be chosen // as a spill candidate since spilling bh doesn't make ebx available. for (unsigned i = 0, e = Supers.size(); i != e; ++i) { for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr) @@ -993,7 +1002,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // one, e.g. X86::mov32to32_. These move instructions are not coalescable. if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) { VNInfo *vni = cur->begin()->valno; - if (!vni->isUnused()) { + if (!vni->isUnused() && vni->def.isValid()) { MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); if (CopyMI && CopyMI->isCopy()) { unsigned DstSubReg = CopyMI->getOperand(0).getSubReg(); @@ -1109,11 +1118,18 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // list. if (physReg) { DEBUG(dbgs() << tri_->getName(physReg) << '\n'); + assert(RC->contains(physReg) && "Invalid candidate"); vrm_->assignVirt2Phys(cur->reg, physReg); addRegUse(physReg); active_.push_back(std::make_pair(cur, cur->begin())); handled_.push_back(cur); + // Remember physReg for avoiding a write-after-write hazard in the next + // instruction. + if (AvoidWAWHazard && + tri_->avoidWriteAfterWrite(mri_->getRegClass(cur->reg))) + avoidWAW_ = physReg; + // "Upgrade" the physical register since it has been allocated. UpgradeRegister(physReg); if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) { @@ -1229,8 +1245,9 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // linearscan. if (cur->weight != HUGE_VALF && cur->weight <= minWeight) { DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n'); - SmallVector<LiveInterval*, 8> spillIs, added; - spiller_->spill(cur, added, spillIs); + SmallVector<LiveInterval*, 8> added; + LiveRangeEdit LRE(*cur, added); + spiller_->spill(LRE); std::sort(added.begin(), added.end(), LISorter()); if (added.empty()) @@ -1306,7 +1323,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n'); if (sli->beginIndex() < earliestStart) earliestStart = sli->beginIndex(); - spiller_->spill(sli, added, spillIs); + LiveRangeEdit LRE(*sli, added, 0, &spillIs); + spiller_->spill(LRE); spilled.insert(sli->reg); } @@ -1442,7 +1460,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, if (reservedRegs_.test(Reg)) continue; // Skip recently allocated registers. - if (isRegAvail(Reg) && !isRecentlyUsed(Reg)) { + if (isRegAvail(Reg) && (!SkipDGRegs || !isRecentlyUsed(Reg))) { FreeReg = Reg; if (FreeReg < inactiveCounts.size()) FreeRegInactiveCount = inactiveCounts[FreeReg]; @@ -1473,7 +1491,8 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, if (reservedRegs_.test(Reg)) continue; if (isRegAvail(Reg) && Reg < inactiveCounts.size() && - FreeRegInactiveCount < inactiveCounts[Reg] && !isRecentlyUsed(Reg)) { + FreeRegInactiveCount < inactiveCounts[Reg] && + (!SkipDGRegs || !isRecentlyUsed(Reg))) { FreeReg = Reg; FreeRegInactiveCount = inactiveCounts[Reg]; if (FreeRegInactiveCount == MaxInactiveCount) @@ -1524,12 +1543,10 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { return Preference; } - if (!DowngradedRegs.empty()) { - unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, - true); - if (FreeReg) - return FreeReg; - } + unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, + true); + if (FreeReg) + return FreeReg; return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false); } diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index ea0d1fe..1e1f1e0 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -534,10 +534,9 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, vregsToAlloc.erase(vreg); const LiveInterval* spillInterval = &lis->getInterval(vreg); double oldWeight = spillInterval->weight; - SmallVector<LiveInterval*, 8> spillIs; rmf->rememberUseDefs(spillInterval); std::vector<LiveInterval*> newSpills = - lis->addIntervalsForSpills(*spillInterval, spillIs, loopInfo, *vrm); + lis->addIntervalsForSpills(*spillInterval, 0, loopInfo, *vrm); addStackInterval(spillInterval, mri); rmf->rememberSpills(spillInterval, newSpills); diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index a2580b8..ebfe533 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -126,9 +126,10 @@ void RegScavenger::forward() { MBBI = MBB->begin(); Tracking = true; } else { - assert(MBBI != MBB->end() && "Already at the end of the basic block!"); + assert(MBBI != MBB->end() && "Already past the end of the basic block!"); MBBI = llvm::next(MBBI); } + assert(MBBI != MBB->end() && "Already at the end of the basic block!"); MachineInstr *MI = MBBI; @@ -241,12 +242,13 @@ unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { /// getRegsAvailable - Return all available registers in the register class /// in Mask. -void RegScavenger::getRegsAvailable(const TargetRegisterClass *RC, - BitVector &Mask) { +BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) { + BitVector Mask(TRI->getNumRegs()); for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) if (!isAliasUsed(*I)) Mask.set(*I); + return Mask; } /// findSurvivorReg - Return the candidate register that is unused for the @@ -335,9 +337,13 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, } // Try to find a register that's unused if there is one, as then we won't - // have to spill. - if ((Candidates & RegsAvailable).any()) - Candidates &= RegsAvailable; + // have to spill. Search explicitly rather than masking out based on + // RegsAvailable, as RegsAvailable does not take aliases into account. + // That's what getRegsAvailable() is for. + BitVector Available = getRegsAvailable(RC); + + if ((Candidates & Available).any()) + Candidates &= Available; // Find the register whose use is furthest away. MachineBasicBlock::iterator UseMI; diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp index cbfd5a2..c8de382 100644 --- a/lib/CodeGen/RenderMachineFunction.cpp +++ b/lib/CodeGen/RenderMachineFunction.cpp @@ -47,7 +47,7 @@ outputFileSuffix("rmf-file-suffix", static cl::opt<std::string> machineFuncsToRender("rmf-funcs", - cl::desc("Coma seperated list of functions to render" + cl::desc("Comma separated list of functions to render" ", or \"*\"."), cl::init(""), cl::Hidden); diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 3388889..1302395 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -472,7 +472,7 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { #endif } -/// AddPred - Updates the topological ordering to accomodate an edge +/// AddPred - Updates the topological ordering to accommodate an edge /// to be added from SUnit X to SUnit Y. void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) { int UpperBound, LowerBound; @@ -490,7 +490,7 @@ void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) { } } -/// RemovePred - Updates the topological ordering to accomodate an +/// RemovePred - Updates the topological ordering to accommodate an /// an edge to be removed from the specified node N from the predecessors /// of the current node M. void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) { diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index f17023e..67c209e 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -371,7 +371,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // will be overlapped by work done outside the current // scheduling region. Latency -= std::min(Latency, Count); - // Add the artifical edge. + // Add the artificial edge. ExitSU.addPred(SDep(SU, SDep::Order, Latency, /*Reg=*/0, /*isNormalMemory=*/false, /*isMustAlias=*/false, diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index 027f615..4b55a22 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -51,7 +51,8 @@ namespace llvm { /// If you want to override the dot attributes printed for a particular /// edge, override this method. static std::string getEdgeAttributes(const SUnit *Node, - SUnitIterator EI) { + SUnitIterator EI, + const ScheduleDAG *Graph) { if (EI.isArtificialDep()) return "color=cyan,style=dashed"; if (EI.isCtrlDep()) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9cc70a3..f427511 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -319,6 +319,10 @@ void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { ((DAGCombiner*)DC)->AddToWorkList(N); } +void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { + ((DAGCombiner*)DC)->removeFromWorkList(N); +} + SDValue TargetLowering::DAGCombinerInfo:: CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); @@ -1290,6 +1294,16 @@ SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, return SDValue(); } +/// isCarryMaterialization - Returns true if V is an ADDE node that is known to +/// return 0 or 1 depending on the carry flag. +static bool isCarryMaterialization(SDValue V) { + if (V.getOpcode() != ISD::ADDE) + return false; + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(0)); + return C && C->isNullValue() && V.getOperand(0) == V.getOperand(1); +} + SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1453,6 +1467,18 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); } + // add (adde 0, 0, glue), X -> adde X, 0, glue + if (N0->hasOneUse() && isCarryMaterialization(N0)) + return DAG.getNode(ISD::ADDE, N->getDebugLoc(), + DAG.getVTList(VT, MVT::Glue), N1, N0.getOperand(0), + N0.getOperand(2)); + + // add X, (adde 0, 0, glue) -> adde X, 0, glue + if (N1->hasOneUse() && isCarryMaterialization(N1)) + return DAG.getNode(ISD::ADDE, N->getDebugLoc(), + DAG.getVTList(VT, MVT::Glue), N0, N1.getOperand(0), + N1.getOperand(2)); + return SDValue(); } @@ -1496,6 +1522,16 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { N->getDebugLoc(), MVT::Glue)); } + // addc (adde 0, 0, glue), X -> adde X, 0, glue + if (N0->hasOneUse() && isCarryMaterialization(N0)) + return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N1, + DAG.getConstant(0, VT), N0.getOperand(2)); + + // addc X, (adde 0, 0, glue) -> adde X, 0, glue + if (N1->hasOneUse() && isCarryMaterialization(N1)) + return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N0, + DAG.getConstant(0, VT), N1.getOperand(2)); + return SDValue(); } @@ -1506,6 +1542,12 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + // If both operands are null we know that carry out will always be false. + if (N0C && N0C->isNullValue() && N0 == N1) + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), DAG.getNode(ISD::CARRY_FALSE, + N->getDebugLoc(), + MVT::Glue)); + // canonicalize constant to RHS if (N0C && !N1C) return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), @@ -3281,8 +3323,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return DAG.getUNDEF(VT); if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { + uint64_t ShiftAmt = N1C->getZExtValue(); SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, - N0.getOperand(0), N1); + N0.getOperand(0), + DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); AddToWorkList(SmallShift.getNode()); return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); } @@ -3688,7 +3732,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // fold (sext (load x)) -> (sext (truncate (sextload x))) // None of the supported targets knows how to perform load and sign extend - // in one instruction. We only perform this transformation on scalars. + // on vectors in one instruction. We only perform this transformation on + // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { @@ -3839,7 +3884,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // CombineTo deleted the truncate, if needed, but not what's under it. AddToWorkList(oye); } - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -3892,7 +3937,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (load x)) -> (zext (truncate (zextload x))) // None of the supported targets knows how to perform load and vector_zext - // in one instruction. We only perform this transformation on scalar zext. + // on vectors in one instruction. We only perform this transformation on + // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { @@ -4066,7 +4112,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // CombineTo deleted the truncate, if needed, but not what's under it. AddToWorkList(oye); } - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -4101,7 +4147,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // fold (aext (load x)) -> (aext (truncate (extload x))) // None of the supported targets knows how to perform load and any_ext - // in one instruction. We only perform this transformation on scalars. + // on vectors in one instruction. We only perform this transformation on + // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { @@ -4514,7 +4561,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // See if we can simplify the input to this truncate through knowledge that // only the low bits are being used. // For example "trunc (or (shl x, 8), y)" // -> trunc y - // Currenly we only perform this optimization on scalars because vectors + // Currently we only perform this optimization on scalars because vectors // may have different active low bits. if (!VT.isVector()) { SDValue Shorter = @@ -5101,7 +5148,9 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (sint_to_fp c1) -> c1fp - if (N0C && OpVT != MVT::ppcf128) + if (N0C && OpVT != MVT::ppcf128 && + // ...but only if the target supports immediate floating-point values + (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); // If the input is a legal type, and SINT_TO_FP is not legal on this target, @@ -5123,7 +5172,9 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (uint_to_fp c1) -> c1fp - if (N0C && OpVT != MVT::ppcf128) + if (N0C && OpVT != MVT::ppcf128 && + // ...but only if the target supports immediate floating-point values + (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); // If the input is a legal type, and UINT_TO_FP is not legal on this target, @@ -5817,8 +5868,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // value. // TODO: Handle store large -> read small portion. // TODO: Handle TRUNCSTORE/LOADEXT - if (LD->getExtensionType() == ISD::NON_EXTLOAD && - !LD->isVolatile()) { + if (ISD::isNormalLoad(N) && !LD->isVolatile()) { if (ISD::isNON_TRUNCStore(Chain.getNode())) { StoreSDNode *PrevST = cast<StoreSDNode>(Chain); if (PrevST->getBasePtr() == Ptr && @@ -6217,6 +6267,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { ST->isNonTemporal(), OrigAlign); } + // Turn 'store undef, Ptr' -> nothing. + if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) + return Chain; + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) { // NOTE: If the original store is volatile, this transform must not increase @@ -6250,8 +6304,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getStore(Chain, N->getDebugLoc(), Tmp, Ptr, ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), ST->getAlignment()); - } else if (!ST->isVolatile() && - TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + } + + if (!ST->isVolatile() && + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { // Many FP stores are not made apparent until after legalize, e.g. for // argument passing. Since this is so common, custom legalize the // 64-bit integer store into two 32-bit stores. diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 490b857..3af9482 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -43,6 +43,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Operator.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -121,10 +122,9 @@ unsigned FastISel::getRegForValue(const Value *V) { // only locally. This is because Instructions already have the SSA // def-dominates-use requirement enforced. DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V); - if (I != FuncInfo.ValueMap.end()) { - unsigned Reg = I->second; - return Reg; - } + if (I != FuncInfo.ValueMap.end()) + return I->second; + unsigned Reg = LocalValueMap[V]; if (Reg != 0) return Reg; @@ -164,8 +164,12 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { Reg = getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext()))); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { - // Try to emit the constant directly. - Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF); + if (CF->isNullValue()) { + Reg = TargetMaterializeFloatZero(CF); + } else { + // Try to emit the constant directly. + Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF); + } if (!Reg) { // Try to emit the constant by using an integer constant with a cast. @@ -330,23 +334,51 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { return false; } + // Check if the first operand is a constant, and handle it as "ri". At -O0, + // we don't have anything that canonicalizes operand order. + if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(0))) + if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) { + unsigned Op1 = getRegForValue(I->getOperand(1)); + if (Op1 == 0) return false; + + bool Op1IsKill = hasTrivialKill(I->getOperand(1)); + + unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, + Op1IsKill, CI->getZExtValue(), + VT.getSimpleVT()); + if (ResultReg == 0) return false; + + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; + } + + unsigned Op0 = getRegForValue(I->getOperand(0)); - if (Op0 == 0) - // Unhandled operand. Halt "fast" selection and bail. + if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail. return false; bool Op0IsKill = hasTrivialKill(I->getOperand(0)); // Check if the second operand is a constant and handle it appropriately. if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { - unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(), - ISDOpcode, Op0, Op0IsKill, - CI->getZExtValue()); - if (ResultReg != 0) { - // We successfully emitted code for the given LLVM Instruction. - UpdateValueMap(I, ResultReg); - return true; + uint64_t Imm = CI->getZExtValue(); + + // Transform "sdiv exact X, 8" -> "sra X, 3". + if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) && + cast<BinaryOperator>(I)->isExact() && + isPowerOf2_64(Imm)) { + Imm = Log2_64(Imm); + ISDOpcode = ISD::SRA; } + + unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, + Op0IsKill, Imm, VT.getSimpleVT()); + if (ResultReg == 0) return false; + + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; } // Check if the second operand is a constant float. @@ -454,15 +486,35 @@ bool FastISel::SelectGetElementPtr(const User *I) { } bool FastISel::SelectCall(const User *I) { - const Function *F = cast<CallInst>(I)->getCalledFunction(); + const CallInst *Call = cast<CallInst>(I); + + // Handle simple inline asms. + if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getArgOperand(0))) { + // Don't attempt to handle constraints. + if (!IA->getConstraintString().empty()) + return false; + + unsigned ExtraInfo = 0; + if (IA->hasSideEffects()) + ExtraInfo |= InlineAsm::Extra_HasSideEffects; + if (IA->isAlignStack()) + ExtraInfo |= InlineAsm::Extra_IsAlignStack; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::INLINEASM)) + .addExternalSymbol(IA->getAsmString().c_str()) + .addImm(ExtraInfo); + return true; + } + + const Function *F = Call->getCalledFunction(); if (!F) return false; // Handle selected intrinsic function calls. - unsigned IID = F->getIntrinsicID(); - switch (IID) { + switch (F->getIntrinsicID()) { default: break; case Intrinsic::dbg_declare: { - const DbgDeclareInst *DI = cast<DbgDeclareInst>(I); + const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call); if (!DIVariable(DI->getVariable()).Verify() || !FuncInfo.MF->getMMI().hasDebugInfo()) return true; @@ -494,7 +546,7 @@ bool FastISel::SelectCall(const User *I) { } case Intrinsic::dbg_value: { // This form of DBG_VALUE is target-independent. - const DbgValueInst *DI = cast<DbgValueInst>(I); + const DbgValueInst *DI = cast<DbgValueInst>(Call); const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); const Value *V = DI->getValue(); if (!V) { @@ -523,65 +575,58 @@ bool FastISel::SelectCall(const User *I) { return true; } case Intrinsic::eh_exception: { - EVT VT = TLI.getValueType(I->getType()); - switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) { - default: break; - case TargetLowering::Expand: { - assert(FuncInfo.MBB->isLandingPad() && - "Call to eh.exception not in landing pad!"); - unsigned Reg = TLI.getExceptionAddressRegister(); - const TargetRegisterClass *RC = TLI.getRegClassFor(VT); - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Reg); - UpdateValueMap(I, ResultReg); - return true; - } - } - break; + EVT VT = TLI.getValueType(Call->getType()); + if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand) + break; + + assert(FuncInfo.MBB->isLandingPad() && + "Call to eh.exception not in landing pad!"); + unsigned Reg = TLI.getExceptionAddressRegister(); + const TargetRegisterClass *RC = TLI.getRegClassFor(VT); + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Reg); + UpdateValueMap(Call, ResultReg); + return true; } case Intrinsic::eh_selector: { - EVT VT = TLI.getValueType(I->getType()); - switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) { - default: break; - case TargetLowering::Expand: { - if (FuncInfo.MBB->isLandingPad()) - AddCatchInfo(*cast<CallInst>(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB); - else { + EVT VT = TLI.getValueType(Call->getType()); + if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand) + break; + if (FuncInfo.MBB->isLandingPad()) + AddCatchInfo(*Call, &FuncInfo.MF->getMMI(), FuncInfo.MBB); + else { #ifndef NDEBUG - FuncInfo.CatchInfoLost.insert(cast<CallInst>(I)); + FuncInfo.CatchInfoLost.insert(Call); #endif - // FIXME: Mark exception selector register as live in. Hack for PR1508. - unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) FuncInfo.MBB->addLiveIn(Reg); - } - + // FIXME: Mark exception selector register as live in. Hack for PR1508. unsigned Reg = TLI.getExceptionSelectorRegister(); - EVT SrcVT = TLI.getPointerTy(); - const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Reg); - - bool ResultRegIsKill = hasTrivialKill(I); - - // Cast the register to the type of the selector. - if (SrcVT.bitsGT(MVT::i32)) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, - ResultReg, ResultRegIsKill); - else if (SrcVT.bitsLT(MVT::i32)) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, - ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill); - if (ResultReg == 0) - // Unhandled operand. Halt "fast" selection and bail. - return false; + if (Reg) FuncInfo.MBB->addLiveIn(Reg); + } - UpdateValueMap(I, ResultReg); + unsigned Reg = TLI.getExceptionSelectorRegister(); + EVT SrcVT = TLI.getPointerTy(); + const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Reg); + + bool ResultRegIsKill = hasTrivialKill(Call); + + // Cast the register to the type of the selector. + if (SrcVT.bitsGT(MVT::i32)) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, + ResultReg, ResultRegIsKill); + else if (SrcVT.bitsLT(MVT::i32)) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, + ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill); + if (ResultReg == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; - return true; - } - } - break; + UpdateValueMap(Call, ResultReg); + + return true; } } @@ -966,59 +1011,33 @@ unsigned FastISel::FastEmit_rri(MVT, MVT, unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, bool Op0IsKill, uint64_t Imm, MVT ImmType) { + // If this is a multiply by a power of two, emit this as a shift left. + if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) { + Opcode = ISD::SHL; + Imm = Log2_64(Imm); + } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) { + // div x, 8 -> srl x, 3 + Opcode = ISD::SRL; + Imm = Log2_64(Imm); + } + + // Horrible hack (to be removed), check to make sure shift amounts are + // in-range. + if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) && + Imm >= VT.getSizeInBits()) + return 0; + // First check if immediate type is legal. If not, we can't use the ri form. unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); if (ResultReg != 0) return ResultReg; unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm); - if (MaterialReg == 0) - return 0; - return FastEmit_rr(VT, VT, Opcode, - Op0, Op0IsKill, - MaterialReg, /*Kill=*/true); -} - -/// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries -/// to emit an instruction with a floating-point immediate operand using -/// FastEmit_rf. If that fails, it materializes the immediate into a register -/// and try FastEmit_rr instead. -unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode, - unsigned Op0, bool Op0IsKill, - const ConstantFP *FPImm, MVT ImmType) { - // First check if immediate type is legal. If not, we can't use the rf form. - unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, Op0IsKill, FPImm); - if (ResultReg != 0) - return ResultReg; - - // Materialize the constant in a register. - unsigned MaterialReg = FastEmit_f(ImmType, ImmType, ISD::ConstantFP, FPImm); if (MaterialReg == 0) { - // If the target doesn't have a way to directly enter a floating-point - // value into a register, use an alternate approach. - // TODO: The current approach only supports floating-point constants - // that can be constructed by conversion from integer values. This should - // be replaced by code that creates a load from a constant-pool entry, - // which will require some target-specific work. - const APFloat &Flt = FPImm->getValueAPF(); - EVT IntVT = TLI.getPointerTy(); - - uint64_t x[2]; - uint32_t IntBitWidth = IntVT.getSizeInBits(); - bool isExact; - (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, - APFloat::rmTowardZero, &isExact); - if (!isExact) - return 0; - APInt IntVal(IntBitWidth, 2, x); - - unsigned IntegerReg = FastEmit_i(IntVT.getSimpleVT(), IntVT.getSimpleVT(), - ISD::Constant, IntVal.getZExtValue()); - if (IntegerReg == 0) - return 0; - MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT, - ISD::SINT_TO_FP, IntegerReg, /*Kill=*/true); - if (MaterialReg == 0) - return 0; + // This is a bit ugly/slow, but failing here means falling out of + // fast-isel, which would be very slow. + const IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(), + VT.getSizeInBits()); + MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm)); } return FastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, @@ -1099,6 +1118,29 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, return ResultReg; } +unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm1, uint64_t Imm2) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm1) + .addImm(Imm2); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm1) + .addImm(Imm2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, @@ -1160,6 +1202,23 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, return ResultReg; } +unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm1, uint64_t Imm2) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addImm(Imm1).addImm(Imm2); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm1).addImm(Imm2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx) { @@ -1215,7 +1274,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Only handle legal types. Two interesting things to note here. First, // by bailing out early, we may leave behind some dead instructions, // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its - // own moves. Second, this check is necessary becuase FastISel doesn't + // own moves. Second, this check is necessary because FastISel doesn't // use CreateRegs to create registers, so it always creates // exactly one register for each non-void instruction. EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 2ae3286..d8a5770 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -448,16 +448,30 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, } } -void llvm::CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB, +void llvm::CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad, MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) { - for (BasicBlock::const_iterator I = SrcBB->begin(), E = --SrcBB->end(); - I != E; ++I) - if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) { - // Apply the catch info to DestBB. - AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]); + SmallPtrSet<const BasicBlock*, 4> Visited; + + // The 'eh.selector' call may not be in the direct successor of a basic block, + // but could be several successors deeper. If we don't find it, try going one + // level further. <rdar://problem/8824861> + while (Visited.insert(SuccBB)) { + for (BasicBlock::const_iterator I = SuccBB->begin(), E = --SuccBB->end(); + I != E; ++I) + if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) { + // Apply the catch info to LPad. + AddCatchInfo(*EHSel, MMI, FLI.MBBMap[LPad]); #ifndef NDEBUG - if (!FLI.MBBMap[SrcBB]->isLandingPad()) - FLI.CatchInfoFound.insert(EHSel); + if (!FLI.MBBMap[SuccBB]->isLandingPad()) + FLI.CatchInfoFound.insert(EHSel); #endif - } + return; + } + + const BranchInst *Br = dyn_cast<BranchInst>(SuccBB->getTerminator()); + if (Br && Br->isUnconditional()) + SuccBB = Br->getSuccessor(0); + else + break; + } } diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f08528f..2b6c56e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -61,10 +61,10 @@ class SelectionDAGLegalize { // Libcall insertion helpers. - /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been + /// LastCALLSEQ - This keeps track of the CALLSEQ_END node that has been /// legalized. We use this to ensure that calls are properly serialized /// against each other, including inserted libcalls. - SDValue LastCALLSEQ_END; + SmallVector<SDValue, 8> LastCALLSEQ; enum LegalizeAction { Legal, // The target natively supports this operation. @@ -142,6 +142,9 @@ private: DebugLoc dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); + SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, + unsigned NumOps, bool isSigned, DebugLoc dl); + std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, @@ -153,6 +156,7 @@ private: RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128); + void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl); SDValue ExpandBUILD_VECTOR(SDNode *Node); @@ -178,6 +182,15 @@ private: void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); + + SDValue getLastCALLSEQ() { return LastCALLSEQ.back(); } + void setLastCALLSEQ(const SDValue s) { LastCALLSEQ.back() = s; } + void pushLastCALLSEQ(SDValue s) { + LastCALLSEQ.push_back(s); + } + void popLastCALLSEQ() { + LastCALLSEQ.pop_back(); + } }; } @@ -223,7 +236,7 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag, } void SelectionDAGLegalize::LegalizeDAG() { - LastCALLSEQ_END = DAG.getEntryNode(); + pushLastCALLSEQ(DAG.getEntryNode()); // The legalize process is inherently a bottom-up recursive process (users // legalize their uses before themselves). Given infinite stack space, we @@ -251,14 +264,15 @@ void SelectionDAGLegalize::LegalizeDAG() { /// FindCallEndFromCallStart - Given a chained node that is part of a call /// sequence, find the CALLSEQ_END node that terminates the call sequence. static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) { - // Nested CALLSEQ_START/END constructs aren't yet legal, - // but we can DTRT and handle them correctly here. + int next_depth = depth; if (Node->getOpcode() == ISD::CALLSEQ_START) - depth++; - else if (Node->getOpcode() == ISD::CALLSEQ_END) { - depth--; - if (depth == 0) + next_depth = depth + 1; + if (Node->getOpcode() == ISD::CALLSEQ_END) { + assert(depth > 0 && "negative depth!"); + if (depth == 1) return Node; + else + next_depth = depth - 1; } if (Node->use_empty()) return 0; // No CallSeqEnd @@ -289,7 +303,7 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) { SDNode *User = *UI; for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) if (User->getOperand(i) == TheChain) - if (SDNode *Result = FindCallEndFromCallStart(User, depth)) + if (SDNode *Result = FindCallEndFromCallStart(User, next_depth)) return Result; } return 0; @@ -786,7 +800,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { } } } - return SDValue(); + return SDValue(0, 0); } /// LegalizeOp - We know that the specified value has a legal type, and @@ -934,11 +948,12 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::BR_JT: case ISD::BR_CC: case ISD::BRCOND: - // Branches tweak the chain to include LastCALLSEQ_END + assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?"); + // Branches tweak the chain to include LastCALLSEQ Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0], - LastCALLSEQ_END); + getLastCALLSEQ()); Ops[0] = LegalizeOp(Ops[0]); - LastCALLSEQ_END = DAG.getEntryNode(); + setLastCALLSEQ(DAG.getEntryNode()); break; case ISD::SHL: case ISD::SRL: @@ -948,7 +963,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. if (!Ops[1].getValueType().isVector()) - Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[1])); + Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(), + Ops[1])); break; case ISD::SRL_PARTS: case ISD::SRA_PARTS: @@ -956,7 +972,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. if (!Ops[2].getValueType().isVector()) - Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[2])); + Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(), + Ops[2])); break; } @@ -1024,8 +1041,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } break; case ISD::CALLSEQ_START: { - static int depth = 0; SDNode *CallEnd = FindCallEndFromCallStart(Node); + assert(CallEnd && "didn't find CALLSEQ_END!"); // Recursively Legalize all of the inputs of the call end that do not lead // to this call start. This ensures that any libcalls that need be inserted @@ -1042,9 +1059,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Merge in the last call to ensure that this call starts after the last // call ended. - if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken && depth == 0) { + if (getLastCALLSEQ().getOpcode() != ISD::EntryToken) { Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - Tmp1, LastCALLSEQ_END); + Tmp1, getLastCALLSEQ()); Tmp1 = LegalizeOp(Tmp1); } @@ -1065,29 +1082,28 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // sequence have been legalized, legalize the call itself. During this // process, no libcalls can/will be inserted, guaranteeing that no calls // can overlap. - - SDValue Saved_LastCALLSEQ_END = LastCALLSEQ_END ; // Note that we are selecting this call! - LastCALLSEQ_END = SDValue(CallEnd, 0); + setLastCALLSEQ(SDValue(CallEnd, 0)); - depth++; // Legalize the call, starting from the CALLSEQ_END. - LegalizeOp(LastCALLSEQ_END); - depth--; - assert(depth >= 0 && "Un-matched CALLSEQ_START?"); - if (depth > 0) - LastCALLSEQ_END = Saved_LastCALLSEQ_END; + LegalizeOp(getLastCALLSEQ()); return Result; } case ISD::CALLSEQ_END: - // If the CALLSEQ_START node hasn't been legalized first, legalize it. This - // will cause this node to be legalized as well as handling libcalls right. - if (LastCALLSEQ_END.getNode() != Node) { - LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0)); - DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); - assert(I != LegalizedNodes.end() && - "Legalizing the call start should have legalized this node!"); - return I->second; + { + SDNode *myCALLSEQ_BEGIN = FindCallStartFromCallEnd(Node); + + // If the CALLSEQ_START node hasn't been legalized first, legalize it. This + // will cause this node to be legalized as well as handling libcalls right. + if (getLastCALLSEQ().getNode() != Node) { + LegalizeOp(SDValue(myCALLSEQ_BEGIN, 0)); + DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); + assert(I != LegalizedNodes.end() && + "Legalizing the call start should have legalized this node!"); + return I->second; + } + + pushLastCALLSEQ(SDValue(myCALLSEQ_BEGIN, 0)); } // Otherwise, the call start has been legalized and everything is going @@ -1116,6 +1132,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } } // This finishes up call legalization. + popLastCALLSEQ(); + // If the CALLSEQ_END node has a flag, remember that we legalized it. AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0)); if (Node->getNumValues() == 2) @@ -2035,9 +2053,43 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, return DAG.getRoot(); // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); + return CallInfo.first; +} + +/// ExpandLibCall - Generate a libcall taking the given operands as arguments +/// and returning a result of type RetVT. +SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, + const SDValue *Ops, unsigned NumOps, + bool isSigned, DebugLoc dl) { + TargetLowering::ArgListTy Args; + Args.reserve(NumOps); + + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0; i != NumOps; ++i) { + Entry.Node = Ops[i]; + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + std::pair<SDValue,SDValue> CallInfo = + TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, + false, 0, TLI.getLibcallCallingConv(LC), false, + /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); + + // Legalize the call sequence, starting with the chain. This will advance // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that // was added by LowerCallTo (guaranteeing proper serialization of calls). LegalizeOp(CallInfo.second); + return CallInfo.first; } @@ -2072,7 +2124,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, Callee, Args, DAG, Node->getDebugLoc()); // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that // was added by LowerCallTo (guaranteeing proper serialization of calls). LegalizeOp(CallInfo.second); return CallInfo; @@ -2112,6 +2164,113 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, return ExpandLibCall(LC, Node, isSigned); } +/// isDivRemLibcallAvailable - Return true if divmod libcall is available. +static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, + const TargetLowering &TLI) { + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: assert(0 && "Unexpected request for libcall!"); + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; + } + + return TLI.getLibcallName(LC) != 0; +} + +/// UseDivRem - Only issue divrem libcall if both quotient and remainder are +/// needed. +static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) { + unsigned OtherOpcode = 0; + if (isSigned) + OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV; + else + OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV; + + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), + UE = Op0.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (User == Node) + continue; + if (User->getOpcode() == OtherOpcode && + User->getOperand(0) == Op0 && + User->getOperand(1) == Op1) + return true; + } + return false; +} + +/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem +/// pairs. +void +SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + unsigned Opcode = Node->getOpcode(); + bool isSigned = Opcode == ISD::SDIVREM; + + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: assert(0 && "Unexpected request for libcall!"); + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; + } + + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDValue InChain = DAG.getEntryNode(); + + EVT RetVT = Node->getValueType(0); + const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { + EVT ArgVT = Node->getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + + // Also pass the return address of the remainder. + SDValue FIPtr = DAG.CreateStackTemporary(RetVT); + Entry.Node = FIPtr; + Entry.Ty = RetTy->getPointerTo(); + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + // Splice the libcall in wherever FindInputOutputChains tells us to. + DebugLoc dl = Node->getDebugLoc(); + std::pair<SDValue, SDValue> CallInfo = + TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, + /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); + + // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); + + // Remainder is loaded back from the stack frame. + SDValue Rem = DAG.getLoad(RetVT, dl, getLastCALLSEQ(), FIPtr, + MachinePointerInfo(), false, false, 0); + Results.push_back(CallInfo.first); + Results.push_back(Rem); +} + /// ExpandLegalINT_TO_FP - This function is responsible for legalizing a /// INT_TO_FP operation of the specified operand when the target requests that /// we expand it. At this point, we know that the result and operand types are @@ -2759,7 +2918,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, } case ISD::FP_ROUND_INREG: { // The only way we can lower this is to turn it into a TRUNCSTORE, - // EXTLOAD pair, targetting a temporary location (a stack slot). + // EXTLOAD pair, targeting a temporary location (a stack slot). // NOTE: there is a choice here between constantly creating new stack // slots and always reusing the same one. We currently always create @@ -3085,24 +3244,25 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; Tmp2 = Node->getOperand(0); Tmp3 = Node->getOperand(1); - if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || + (isDivRemLibcallAvailable(Node, isSigned, TLI) && + UseDivRem(Node, isSigned, false))) { Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { // X % Y -> X-X/Y*Y Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3); Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3); Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1); - } else if (isSigned) { + } else if (isSigned) Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SREM_I8, RTLIB::SREM_I16, RTLIB::SREM_I32, RTLIB::SREM_I64, RTLIB::SREM_I128); - } else { + else Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UREM_I8, RTLIB::UREM_I16, RTLIB::UREM_I32, RTLIB::UREM_I64, RTLIB::UREM_I128); - } Results.push_back(Tmp1); break; } @@ -3112,7 +3272,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); - if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || + (isDivRemLibcallAvailable(Node, isSigned, TLI) && + UseDivRem(Node, isSigned, true))) Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), Node->getOperand(1)); else if (isSigned) @@ -3141,6 +3303,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(Tmp1.getValue(1)); break; } + case ISD::SDIVREM: + case ISD::UDIVREM: + // Expand into divrem libcall + ExpandDivRemLibCall(Node, Results); + break; case ISD::MUL: { EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); @@ -3225,6 +3392,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::UMULO: case ISD::SMULO: { EVT VT = Node->getValueType(0); + EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); SDValue BottomHalf; @@ -3242,7 +3410,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, TopHalf = BottomHalf.getValue(1); } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2))) { - EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); @@ -3255,7 +3422,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, // have a libcall big enough. // Also, we can fall back to a division in some cases, but that's a big // performance hit in the general case. - EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (WideVT == MVT::i16) LC = RTLIB::MUL_I16; @@ -3266,15 +3432,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, else if (WideVT == MVT::i128) LC = RTLIB::MUL_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!"); - LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); - RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); - - SDValue Ret = ExpandLibCall(LC, Node, isSigned); - BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Ret); - TopHalf = DAG.getNode(ISD::SRL, dl, Ret.getValueType(), Ret, - DAG.getConstant(VT.getSizeInBits(), TLI.getPointerTy())); - TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, TopHalf); + + // The high part is obtained by SRA'ing all but one of the bits of low + // part. + unsigned LoSize = VT.getSizeInBits(); + SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); + SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); + + // Here we're passing the 2 arguments explicitly as 4 arguments that are + // pre-lowered to the correct types. This all depends upon WideVT not + // being a legal type for the architecture and thus has to be split to + // two arguments. + SDValue Args[] = { LHS, HiLHS, RHS, HiRHS }; + SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl); + BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, + DAG.getIntPtrConstant(0)); + TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, + DAG.getIntPtrConstant(1)); } + if (isSigned) { Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy(BottomHalf.getValueType())); @@ -3409,7 +3587,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, dl); - LastCALLSEQ_END = DAG.getEntryNode(); + assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?"); + setLastCALLSEQ(DAG.getEntryNode()); assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index f0752df..935aab0 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1051,8 +1051,6 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break; case ISD::UADDO: case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break; - case ISD::UMULO: - case ISD::SMULO: ExpandIntRes_UMULSMULO(N, Lo, Hi); break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -1428,9 +1426,9 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, HiOps[2] = Lo.getValue(1); Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3); } - return; + return; } - + if (N->getOpcode() == ISD::ADD) { Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2); Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2); @@ -2128,31 +2126,6 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, ReplaceValueWith(SDValue(N, 1), Ofl); } -void DAGTypeLegalizer::ExpandIntRes_UMULSMULO(SDNode *N, - SDValue &Lo, SDValue &Hi) { - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - DebugLoc dl = N->getDebugLoc(); - EVT VT = N->getValueType(0); - EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() / 2); - // Expand the result by simply replacing it with the equivalent - // non-overflow-checking operation. - SDValue Ret = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS); - SplitInteger(Ret, Lo, Hi); - - // Now calculate overflow. - SDValue Ofl; - if (N->getOpcode() == ISD::UMULO) - Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi, - DAG.getConstant(0, VT), ISD::SETNE); - else { - SDValue Tmp = DAG.getConstant(VT.getSizeInBits() - 1, HalfVT); - Tmp = DAG.getNode(ISD::SRA, dl, HalfVT, Lo, Tmp); - Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi, Tmp, ISD::SETNE); - } - ReplaceValueWith(SDValue(N, 1), Ofl); -} - void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 3f81bbb..5409b88 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -348,7 +348,6 @@ private: void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandIntRes_UMULSMULO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi); @@ -523,6 +522,7 @@ private: SDValue ScalarizeVecRes_BITCAST(SDNode *N); SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N); SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); + SDValue ScalarizeVecRes_FP_ROUND(SDNode *N); SDValue ScalarizeVecRes_FPOWI(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); @@ -566,7 +566,6 @@ private: void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 167dbe0..5d0f923 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -58,6 +58,9 @@ class VectorLegalizer { SDValue UnrollVSETCC(SDValue Op); // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB // isn't legal. + // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if + // SINT_TO_FLOAT and SHR on vectors isn't legal. + SDValue ExpandUINT_TO_FLOAT(SDValue Op); SDValue ExpandFNEG(SDValue Op); // Implements vector promotion; this is essentially just bitcasting the // operands to a different type and bitcasting the result back to the @@ -207,7 +210,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { // FALL THROUGH } case TargetLowering::Expand: - if (Node->getOpcode() == ISD::FNEG) + if (Node->getOpcode() == ISD::UINT_TO_FP) + Result = ExpandUINT_TO_FLOAT(Op); + else if (Node->getOpcode() == ISD::FNEG) Result = ExpandFNEG(Op); else if (Node->getOpcode() == ISD::VSETCC) Result = UnrollVSETCC(Op); @@ -251,6 +256,48 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { return DAG.getNode(ISD::BITCAST, dl, VT, Op); } +SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { + + + EVT VT = Op.getOperand(0).getValueType(); + DebugLoc DL = Op.getDebugLoc(); + + // Make sure that the SINT_TO_FP and SRL instructions are available. + if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, VT) || + !TLI.isOperationLegalOrCustom(ISD::SRL, VT)) + return DAG.UnrollVectorOp(Op.getNode()); + + EVT SVT = VT.getScalarType(); + assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) && + "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); + + unsigned BW = SVT.getSizeInBits(); + SDValue HalfWord = DAG.getConstant(BW/2, VT); + + // Constants to clear the upper part of the word. + // Notice that we can also use SHL+SHR, but using a constant is slightly + // faster on x86. + uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF; + SDValue HalfWordMask = DAG.getConstant(HWMask, VT); + + // Two to the power of half-word-size. + SDValue TWOHW = DAG.getConstantFP((1<<(BW/2)), Op.getValueType()); + + // Clear upper part of LO, lower HI + SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord); + SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask); + + // Convert hi and lo to floats + // Convert the hi part back to the upper values + SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI); + fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW); + SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO); + + // Add the two halves + return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO); +} + + SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType()); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 182f8fc..0b4dd35 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -50,6 +50,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::BUILD_VECTOR: R = N->getOperand(0); break; case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break; case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break; case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; @@ -63,27 +64,33 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; case ISD::VSETCC: R = ScalarizeVecRes_VSETCC(N); break; + case ISD::ANY_EXTEND: case ISD::CTLZ: case ISD::CTPOP: case ISD::CTTZ: case ISD::FABS: + case ISD::FCEIL: case ISD::FCOS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FFLOOR: + case ISD::FLOG: + case ISD::FLOG10: + case ISD::FLOG2: + case ISD::FNEARBYINT: case ISD::FNEG: + case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::FRINT: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: - case ISD::FFLOOR: - case ISD::FCEIL: - case ISD::FRINT: - case ISD::FNEARBYINT: - case ISD::UINT_TO_FP: + case ISD::SIGN_EXTEND: case ISD::SINT_TO_FP: case ISD::TRUNCATE: - case ISD::SIGN_EXTEND: + case ISD::UINT_TO_FP: case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: R = ScalarizeVecRes_UnaryOp(N); break; @@ -145,6 +152,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) { N->getOperand(0), N->getOperand(1)); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) { + EVT NewVT = N->getValueType(0).getVectorElementType(); + SDValue Op = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), + NewVT, Op, N->getOperand(1)); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { SDValue Op = GetScalarizedVector(N->getOperand(0)); return DAG.getNode(ISD::FPOWI, N->getDebugLoc(), @@ -405,11 +419,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; - case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break; case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break; case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; - case ISD::CONVERT_RNDSAT: SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break; case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; @@ -427,32 +439,35 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break; - case ISD::CTTZ: + case ISD::ANY_EXTEND: + case ISD::CONVERT_RNDSAT: case ISD::CTLZ: case ISD::CTPOP: - case ISD::FNEG: + case ISD::CTTZ: case ISD::FABS: - case ISD::FSQRT: - case ISD::FSIN: + case ISD::FCEIL: case ISD::FCOS: - case ISD::FTRUNC: + case ISD::FEXP: + case ISD::FEXP2: case ISD::FFLOOR: - case ISD::FCEIL: - case ISD::FRINT: + case ISD::FLOG: + case ISD::FLOG10: + case ISD::FLOG2: case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FP_EXTEND: + case ISD::FP_ROUND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::FRINT: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: + case ISD::SIGN_EXTEND: case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: case ISD::TRUNCATE: - case ISD::SIGN_EXTEND: + case ISD::UINT_TO_FP: case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: - case ISD::FEXP: - case ISD::FEXP2: - case ISD::FLOG: - case ISD::FLOG2: - case ISD::FLOG10: SplitVecRes_UnaryOp(N, Lo, Hi); break; @@ -587,60 +602,6 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size()); } -void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, - SDValue &Hi) { - EVT LoVT, HiVT; - DebugLoc dl = N->getDebugLoc(); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); - - SDValue DTyOpLo = DAG.getValueType(LoVT); - SDValue DTyOpHi = DAG.getValueType(HiVT); - - SDValue RndOp = N->getOperand(3); - SDValue SatOp = N->getOperand(4); - ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); - - // Split the input. - SDValue VLo, VHi; - EVT InVT = N->getOperand(0).getValueType(); - switch (getTypeAction(InVT)) { - default: llvm_unreachable("Unexpected type action!"); - case Legal: { - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - LoVT.getVectorNumElements()); - VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), - DAG.getIntPtrConstant(0)); - VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - break; - } - case SplitVector: - GetSplitVector(N->getOperand(0), VLo, VHi); - break; - case WidenVector: { - // If the result needs to be split and the input needs to be widened, - // the two types must have different lengths. Use the widened result - // and extract from it to do the split. - SDValue InOp = GetWidenedVector(N->getOperand(0)); - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - LoVT.getVectorNumElements()); - VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(0)); - VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - break; - } - } - - SDValue STyOpLo = DAG.getValueType(VLo.getValueType()); - SDValue STyOpHi = DAG.getValueType(VHi.getValueType()); - - Lo = DAG.getConvertRndSat(LoVT, dl, VLo, DTyOpLo, STyOpLo, RndOp, SatOp, - CvtCode); - Hi = DAG.getConvertRndSat(HiVT, dl, VHi, DTyOpHi, STyOpHi, RndOp, SatOp, - CvtCode); -} - void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Vec = N->getOperand(0); @@ -840,8 +801,25 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, } } - Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); - Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + if (N->getOpcode() == ISD::FP_ROUND) { + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1)); + } else if (N->getOpcode() == ISD::CONVERT_RNDSAT) { + SDValue DTyOpLo = DAG.getValueType(LoVT); + SDValue DTyOpHi = DAG.getValueType(HiVT); + SDValue STyOpLo = DAG.getValueType(Lo.getValueType()); + SDValue STyOpHi = DAG.getValueType(Hi.getValueType()); + SDValue RndOp = N->getOperand(3); + SDValue SatOp = N->getOperand(4); + ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); + Lo = DAG.getConvertRndSat(LoVT, dl, Lo, DTyOpLo, STyOpLo, RndOp, SatOp, + CvtCode); + Hi = DAG.getConvertRndSat(HiVT, dl, Hi, DTyOpHi, STyOpHi, RndOp, SatOp, + CvtCode); + } else { + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + } } void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, @@ -989,11 +967,11 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::CTTZ: case ISD::CTLZ: case ISD::CTPOP: + case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: - case ISD::FP_EXTEND: case ISD::FTRUNC: case ISD::TRUNCATE: case ISD::SIGN_EXTEND: @@ -1270,15 +1248,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Shift(N); break; + case ISD::ANY_EXTEND: + case ISD::FP_EXTEND: case ISD::FP_ROUND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::SIGN_EXTEND: case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: case ISD::TRUNCATE: - case ISD::SIGN_EXTEND: + case ISD::UINT_TO_FP: case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: Res = WidenVecRes_Convert(N); break; @@ -1286,15 +1265,20 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::CTPOP: case ISD::CTTZ: case ISD::FABS: + case ISD::FCEIL: case ISD::FCOS: - case ISD::FNEG: - case ISD::FSIN: - case ISD::FSQRT: case ISD::FEXP: case ISD::FEXP2: + case ISD::FFLOOR: case ISD::FLOG: - case ISD::FLOG2: case ISD::FLOG10: + case ISD::FLOG2: + case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FRINT: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: Res = WidenVecRes_Unary(N); break; } @@ -2004,7 +1988,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; - case ISD::FP_ROUND: + case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index e3da208..7b560d1 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -570,13 +570,20 @@ void ScheduleDAGFast::ListScheduleBottomUp() { TRI->getMinimalPhysRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); - // If cross copy register class is null, then it must be possible copy - // the value directly. Do not try duplicate the def. + // If cross copy register class is the same as RC, then it must be + // possible copy the value directly. Do not try duplicate the def. + // If cross copy register class is not the same as RC, then it's + // possible to copy the value but it require cross register class copies + // and it is expensive. + // If cross copy register class is null, then it's not possible to copy + // the value at all. SUnit *NewDef = 0; - if (DestRC) + if (DestRC != RC) { NewDef = CopyAndMoveSuccessors(LRDef); - else - DestRC = RC; + if (!DestRC && !NewDef) + report_fatal_error("Can't handle live physical " + "register dependency!"); + } if (!NewDef) { // Issue copies, these can be expensive cross register class copies. SmallVector<SUnit*, 2> Copies; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 0b548b2..88bd450 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -70,6 +70,50 @@ static cl::opt<bool> DisableSchedCycles( "disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling")); +// Temporary sched=list-ilp flags until the heuristics are robust. +// Some options are also available under sched=list-hybrid. +static cl::opt<bool> DisableSchedRegPressure( + "disable-sched-reg-pressure", cl::Hidden, cl::init(false), + cl::desc("Disable regpressure priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedLiveUses( + "disable-sched-live-uses", cl::Hidden, cl::init(true), + cl::desc("Disable live use priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedVRegCycle( + "disable-sched-vrcycle", cl::Hidden, cl::init(false), + cl::desc("Disable virtual register cycle interference checks")); +static cl::opt<bool> DisableSchedPhysRegJoin( + "disable-sched-physreg-join", cl::Hidden, cl::init(false), + cl::desc("Disable physreg def-use affinity")); +static cl::opt<bool> DisableSchedStalls( + "disable-sched-stalls", cl::Hidden, cl::init(true), + cl::desc("Disable no-stall priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedCriticalPath( + "disable-sched-critical-path", cl::Hidden, cl::init(false), + cl::desc("Disable critical path priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedHeight( + "disable-sched-height", cl::Hidden, cl::init(false), + cl::desc("Disable scheduled-height priority in sched=list-ilp")); + +static cl::opt<int> MaxReorderWindow( + "max-sched-reorder", cl::Hidden, cl::init(6), + cl::desc("Number of instructions to allow ahead of the critical path " + "in sched=list-ilp")); + +static cl::opt<unsigned> AvgIPC( + "sched-avg-ipc", cl::Hidden, cl::init(1), + cl::desc("Average inst/cycle whan no target itinerary exists.")); + +#ifndef NDEBUG +namespace { + // For sched=list-ilp, Count the number of times each factor comes into play. + enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth, + FactStatic, FactOther, NumFactors }; +} +static const char *FactorName[NumFactors] = +{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"}; +static int FactorCount[NumFactors]; +#endif //!NDEBUG + namespace { //===----------------------------------------------------------------------===// /// ScheduleDAGRRList - The actual register reduction list scheduler @@ -103,6 +147,10 @@ private: /// MinAvailableCycle - Cycle of the soonest available instruction. unsigned MinAvailableCycle; + /// IssueCount - Count instructions issued in this cycle + /// Currently valid only for bottom-up scheduling. + unsigned IssueCount; + /// LiveRegDefs - A set of physical registers and their definition /// that are "live". These nodes must be scheduled before any other nodes that /// modifies the registers can be scheduled. @@ -234,8 +282,14 @@ void ScheduleDAGRRList::Schedule() { DEBUG(dbgs() << "********** List Scheduling BB#" << BB->getNumber() << " '" << BB->getName() << "' **********\n"); +#ifndef NDEBUG + for (int i = 0; i < NumFactors; ++i) { + FactorCount[i] = 0; + } +#endif //!NDEBUG CurCycle = 0; + IssueCount = 0; MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; NumLiveRegs = 0; LiveRegDefs.resize(TRI->getNumRegs(), NULL); @@ -258,6 +312,11 @@ void ScheduleDAGRRList::Schedule() { else ListScheduleTopDown(); +#ifndef NDEBUG + for (int i = 0; i < NumFactors; ++i) { + DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n"); + } +#endif // !NDEBUG AvailableQueue->releaseState(); } @@ -295,7 +354,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { if (Height < MinAvailableCycle) MinAvailableCycle = Height; - if (isReady(SU)) { + if (isReady(PredSU)) { AvailableQueue->push(PredSU); } // CapturePred and others may have left the node in the pending queue, avoid @@ -383,6 +442,7 @@ void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { if (NextCycle <= CurCycle) return; + IssueCount = 0; AvailableQueue->setCurCycle(NextCycle); if (!HazardRec->isEnabled()) { // Bypass lots of virtual calls in case of long latency. @@ -407,6 +467,13 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { if (DisableSchedCycles) return; + // FIXME: Nodes such as CopyFromReg probably should not advance the current + // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node + // has predecessors the cycle will be advanced when they are scheduled. + // But given the crude nature of modeling latency though such nodes, we + // currently need to treat these nodes like real instructions. + // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; + unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth(); // Bump CurCycle to account for latency. We assume the latency of other @@ -477,6 +544,8 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) { } } +static void resetVRegCycle(SUnit *SU); + /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. @@ -486,12 +555,13 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { #ifndef NDEBUG if (CurCycle < SU->getHeight()) - DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n"); + DEBUG(dbgs() << " Height [" << SU->getHeight() + << "] pipeline stall!\n"); #endif // FIXME: Do not modify node height. It may interfere with // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the - // node it's ready cycle can aid heuristics, and after scheduling it can + // node its ready cycle can aid heuristics, and after scheduling it can // indicate the scheduled cycle. SU->setHeightToAtLeast(CurCycle); @@ -502,6 +572,12 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { AvailableQueue->ScheduledNode(SU); + // If HazardRec is disabled, and each inst counts as one cycle, then + // advance CurCycle before ReleasePredecessors to avoid useless pushes to + // PendingQueue for schedulers that implement HasReadyFilter. + if (!HazardRec->isEnabled() && AvgIPC < 2) + AdvanceToCycle(CurCycle + 1); + // Update liveness of predecessors before successors to avoid treating a // two-address node as a live range def. ReleasePredecessors(SU); @@ -518,16 +594,25 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { } } + resetVRegCycle(SU); + SU->isScheduled = true; // Conditions under which the scheduler should eagerly advance the cycle: // (1) No available instructions // (2) All pipelines full, so available instructions must have hazards. // - // If HazardRec is disabled, count each inst as one cycle. - if (!HazardRec->isEnabled() || HazardRec->atIssueLimit() - || AvailableQueue->empty()) - AdvanceToCycle(CurCycle + 1); + // If HazardRec is disabled, the cycle was pre-advanced before calling + // ReleasePredecessors. In that case, IssueCount should remain 0. + // + // Check AvailableQueue after ReleasePredecessors in case of zero latency. + if (HazardRec->isEnabled() || AvgIPC > 1) { + if (SU->getNode() && SU->getNode()->isMachineOpcode()) + ++IssueCount; + if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) + || (!HazardRec->isEnabled() && IssueCount == AvgIPC)) + AdvanceToCycle(CurCycle + 1); + } } /// CapturePred - This does the opposite of ReleasePred. Since SU is being @@ -872,6 +957,15 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, AddPred(SuccSU, D); DelDeps.push_back(std::make_pair(SuccSU, *I)); } + else { + // Avoid scheduling the def-side copy before other successors. Otherwise + // we could introduce another physreg interference on the copy and + // continue inserting copies indefinitely. + SDep D(CopyFromSU, SDep::Order, /*Latency=*/0, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, /*isArtificial=*/true); + AddPred(SuccSU, D); + } } for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) RemovePred(DelDeps[i].first, DelDeps[i].second); @@ -1077,13 +1171,19 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { TRI->getMinimalPhysRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); - // If cross copy register class is null, then it must be possible copy - // the value directly. Do not try duplicate the def. + // If cross copy register class is the same as RC, then it must be possible + // copy the value directly. Do not try duplicate the def. + // If cross copy register class is not the same as RC, then it's possible to + // copy the value but it require cross register class copies and it is + // expensive. + // If cross copy register class is null, then it's not possible to copy + // the value at all. SUnit *NewDef = 0; - if (DestRC) + if (DestRC != RC) { NewDef = CopyAndMoveSuccessors(LRDef); - else - DestRC = RC; + if (!DestRC && !NewDef) + report_fatal_error("Can't handle live physical register dependency!"); + } if (!NewDef) { // Issue copies, these can be expensive cross register class copies. SmallVector<SUnit*, 2> Copies; @@ -1139,7 +1239,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { // priority. If it is not ready put it back. Schedule the node. Sequence.reserve(SUnits.size()); while (!AvailableQueue->empty()) { - DEBUG(dbgs() << "\n*** Examining Available\n"; + DEBUG(dbgs() << "\nExamining Available:\n"; AvailableQueue->dump(this)); // Pick the best node to schedule taking all constraints into @@ -1318,7 +1418,7 @@ struct src_ls_rr_sort : public queue_sort { struct hybrid_ls_rr_sort : public queue_sort { enum { IsBottomUp = true, - HasReadyFilter = true + HasReadyFilter = false }; RegReductionPQBase *SPQ; @@ -1337,7 +1437,7 @@ struct hybrid_ls_rr_sort : public queue_sort { struct ilp_ls_rr_sort : public queue_sort { enum { IsBottomUp = true, - HasReadyFilter = true + HasReadyFilter = false }; RegReductionPQBase *SPQ; @@ -1395,7 +1495,7 @@ public: std::fill(RegPressure.begin(), RegPressure.end(), 0); for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), E = TRI->regclass_end(); I != E; ++I) - RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF); + RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF); } } @@ -1422,6 +1522,8 @@ public: unsigned getNodePriority(const SUnit *SU) const; unsigned getNodeOrdering(const SUnit *SU) const { + if (!SU->getNode()) return 0; + return scheduleDAG->DAG->GetOrdering(SU->getNode()); } @@ -1450,7 +1552,9 @@ public: bool HighRegPressure(const SUnit *SU) const; - bool MayReduceRegPressure(SUnit *SU); + bool MayReduceRegPressure(SUnit *SU) const; + + int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; void ScheduledNode(SUnit *SU); @@ -1538,6 +1642,20 @@ ILPBURRPriorityQueue; // Static Node Priority for Register Pressure Reduction //===----------------------------------------------------------------------===// +// Check for special nodes that bypass scheduling heuristics. +// Currently this pushes TokenFactor nodes down, but may be used for other +// pseudo-ops as well. +// +// Return -1 to schedule right above left, 1 for left above right. +// Return 0 if no bias exists. +static int checkSpecialNodes(const SUnit *left, const SUnit *right) { + bool LSchedLow = left->isScheduleLow; + bool RSchedLow = right->isScheduleLow; + if (LSchedLow != RSchedLow) + return LSchedLow < RSchedLow ? 1 : -1; + return 0; +} + /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. /// Smaller number is the higher priority. static unsigned @@ -1576,17 +1694,6 @@ void RegReductionPQBase::CalculateSethiUllmanNumbers() { CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); } -void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { - SUnits = &sunits; - // Add pseudo dependency edges for two-address nodes. - AddPseudoTwoAddrDeps(); - // Reroute edges to nodes with multiple uses. - if (!TracksRegPressure) - PrescheduleNodesWithMultipleUses(); - // Calculate node priorities. - CalculateSethiUllmanNumbers(); -} - void RegReductionPQBase::addNode(const SUnit *SU) { unsigned SUSize = SethiUllmanNumbers.size(); if (SUnits->size() > SUSize) @@ -1625,7 +1732,17 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { // If SU does not have a register def, schedule it close to its uses // because it does not lengthen any live ranges. return 0; +#if 1 return SethiUllmanNumbers[SU->NodeNum]; +#else + unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; + if (SU->isCallOp) { + // FIXME: This assumes all of the defs are used as call operands. + int NP = (int)Priority - SU->getNode()->getNumValues(); + return (NP > 0) ? NP : 0; + } + return Priority; +#endif } //===----------------------------------------------------------------------===// @@ -1670,7 +1787,7 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { return false; } -bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) { +bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { const SDNode *N = SU->getNode(); if (!N->isMachineOpcode() || !SU->NumSuccs) @@ -1688,10 +1805,60 @@ bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) { return false; } +// Compute the register pressure contribution by this instruction by count up +// for uses that are not live and down for defs. Only count register classes +// that are already under high pressure. As a side effect, compute the number of +// uses of registers that are already live. +// +// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure +// so could probably be factored. +int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { + LiveUses = 0; + int PDiff = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + // NumRegDefsLeft is zero when enough uses of this node have been scheduled + // to cover the number of registers defined (they are all live). + if (PredSU->NumRegDefsLeft == 0) { + if (PredSU->getNode()->isMachineOpcode()) + ++LiveUses; + continue; + } + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance()) { + EVT VT = RegDefPos.GetValue(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] >= RegLimit[RCId]) + ++PDiff; + } + } + const SDNode *N = SU->getNode(); + + if (!N || !N->isMachineOpcode() || !SU->NumSuccs) + return PDiff; + + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + EVT VT = N->getValueType(i); + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] >= RegLimit[RCId]) + --PDiff; + } + return PDiff; +} + void RegReductionPQBase::ScheduledNode(SUnit *SU) { if (!TracksRegPressure) return; + if (!SU->getNode()) + return; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { if (I->isCtrl()) @@ -1758,6 +1925,8 @@ void RegReductionPQBase::UnscheduledNode(SUnit *SU) { return; const SDNode *N = SU->getNode(); + if (!N) return; + if (!N->isMachineOpcode()) { if (N->getOpcode() != ISD::CopyToReg) return; @@ -1871,7 +2040,29 @@ static unsigned calcMaxScratches(const SUnit *SU) { return Scratches; } -/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a +/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are +/// CopyFromReg from a virtual register. +static bool hasOnlyLiveInOpers(const SUnit *SU) { + bool RetVal = false; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; + const SUnit *PredSU = I->getSUnit(); + if (PredSU->getNode() && + PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { + unsigned Reg = + cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + RetVal = true; + continue; + } + } + return false; + } + return RetVal; +} + +/// hasOnlyLiveOutUses - Return true if SU has only value successors that are /// CopyToReg to a virtual register. This SU def is probably a liveout and /// it has no other use. It should be scheduled closer to the terminator. static bool hasOnlyLiveOutUses(const SUnit *SU) { @@ -1893,20 +2084,67 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) { return RetVal; } -/// UnitsSharePred - Return true if the two scheduling units share a common -/// data predecessor. -static bool UnitsSharePred(const SUnit *left, const SUnit *right) { - SmallSet<const SUnit*, 4> Preds; - for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end(); +// Set isVRegCycle for a node with only live in opers and live out uses. Also +// set isVRegCycle for its CopyFromReg operands. +// +// This is only relevant for single-block loops, in which case the VRegCycle +// node is likely an induction variable in which the operand and target virtual +// registers should be coalesced (e.g. pre/post increment values). Setting the +// isVRegCycle flag helps the scheduler prioritize other uses of the same +// CopyFromReg so that this node becomes the virtual register "kill". This +// avoids interference between the values live in and out of the block and +// eliminates a copy inside the loop. +static void initVRegCycle(SUnit *SU) { + if (DisableSchedVRegCycle) + return; + + if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) + return; + + DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); + + SU->isVRegCycle = true; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; + I->getSUnit()->isVRegCycle = true; + } +} + +// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of +// CopyFromReg operands. We should no longer penalize other uses of this VReg. +static void resetVRegCycle(SUnit *SU) { + if (!SU->isVRegCycle) + return; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); I != E; ++I) { if (I->isCtrl()) continue; // ignore chain preds - Preds.insert(I->getSUnit()); + SUnit *PredSU = I->getSUnit(); + if (PredSU->isVRegCycle) { + assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && + "VRegCycle def must be CopyFromReg"); + I->getSUnit()->isVRegCycle = 0; + } } - for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end(); +} + +// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This +// means a node that defines the VRegCycle has not been scheduled yet. +static bool hasVRegCycleUse(const SUnit *SU) { + // If this SU also defines the VReg, don't hoist it as a "use". + if (SU->isVRegCycle) + return false; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); I != E; ++I) { if (I->isCtrl()) continue; // ignore chain preds - if (Preds.count(I->getSUnit())) + if (I->getSUnit()->isVRegCycle && + I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { + DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); return true; + } } return false; } @@ -1926,23 +2164,12 @@ static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { // Return 0 if latency-based priority is equivalent. static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ) { - // If the two nodes share an operand and one of them has a single - // use that is a live out copy, favor the one that is live out. Otherwise - // it will be difficult to eliminate the copy if the instruction is a - // loop induction variable update. e.g. - // BB: - // sub r1, r3, #1 - // str r0, [r2, r3] - // mov r3, r1 - // cmp - // bne BB - bool SharePred = UnitsSharePred(left, right); - // FIXME: Only adjust if BB is a loop back edge. - // FIXME: What's the cost of a copy? - int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0; - int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0; - int LHeight = (int)left->getHeight() - LBonus; - int RHeight = (int)right->getHeight() - RBonus; + // Scheduling an instruction that uses a VReg whose postincrement has not yet + // been scheduled will induce a copy. Model this as an extra cycle of latency. + int LPenalty = hasVRegCycleUse(left) ? 1 : 0; + int RPenalty = hasVRegCycleUse(right) ? 1 : 0; + int LHeight = (int)left->getHeight() + LPenalty; + int RHeight = (int)right->getHeight() + RPenalty; bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) && BUHasStall(left, LHeight, SPQ); @@ -1953,45 +2180,102 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, // If scheduling either one of the node will cause a pipeline stall, sort // them according to their height. if (LStall) { - if (!RStall) + if (!RStall) { + DEBUG(++FactorCount[FactStall]); return 1; - if (LHeight != RHeight) + } + if (LHeight != RHeight) { + DEBUG(++FactorCount[FactStall]); return LHeight > RHeight ? 1 : -1; - } else if (RStall) + } + } else if (RStall) { + DEBUG(++FactorCount[FactStall]); return -1; + } // If either node is scheduling for latency, sort them by height/depth // and latency. if (!checkPref || (left->SchedulingPref == Sched::Latency || right->SchedulingPref == Sched::Latency)) { if (DisableSchedCycles) { - if (LHeight != RHeight) + if (LHeight != RHeight) { + DEBUG(++FactorCount[FactHeight]); return LHeight > RHeight ? 1 : -1; + } } else { // If neither instruction stalls (!LStall && !RStall) then - // it's height is already covered so only its depth matters. We also reach + // its height is already covered so only its depth matters. We also reach // this if both stall but have the same height. - unsigned LDepth = left->getDepth(); - unsigned RDepth = right->getDepth(); + int LDepth = left->getDepth() - LPenalty; + int RDepth = right->getDepth() - RPenalty; if (LDepth != RDepth) { + DEBUG(++FactorCount[FactDepth]); DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum << ") depth " << LDepth << " vs SU (" << right->NodeNum << ") depth " << RDepth << "\n"); return LDepth < RDepth ? 1 : -1; } } - if (left->Latency != right->Latency) + if (left->Latency != right->Latency) { + DEBUG(++FactorCount[FactOther]); return left->Latency > right->Latency ? 1 : -1; + } } return 0; } static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { + // Schedule physical register definitions close to their use. This is + // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as + // long as shortening physreg live ranges is generally good, we can defer + // creating a subtarget hook. + if (!DisableSchedPhysRegJoin) { + bool LHasPhysReg = left->hasPhysRegDefs; + bool RHasPhysReg = right->hasPhysRegDefs; + if (LHasPhysReg != RHasPhysReg) { + DEBUG(++FactorCount[FactRegUses]); + #ifndef NDEBUG + const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"}; + #endif + DEBUG(dbgs() << " SU (" << left->NodeNum << ") " + << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " + << PhysRegMsg[RHasPhysReg] << "\n"); + return LHasPhysReg < RHasPhysReg; + } + } + + // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. unsigned LPriority = SPQ->getNodePriority(left); unsigned RPriority = SPQ->getNodePriority(right); - if (LPriority != RPriority) + + // Be really careful about hoisting call operands above previous calls. + // Only allows it if it would reduce register pressure. + if (left->isCall && right->isCallOp) { + unsigned RNumVals = right->getNode()->getNumValues(); + RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0; + } + if (right->isCall && left->isCallOp) { + unsigned LNumVals = left->getNode()->getNumValues(); + LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; + } + + if (LPriority != RPriority) { + DEBUG(++FactorCount[FactStatic]); return LPriority > RPriority; + } + + // One or both of the nodes are calls and their sethi-ullman numbers are the + // same, then keep source order. + if (left->isCall || right->isCall) { + unsigned LOrder = SPQ->getNodeOrdering(left); + unsigned ROrder = SPQ->getNodeOrdering(right); + + // Prefer an ordering where the lower the non-zero order number, the higher + // the preference. + if ((LOrder || ROrder) && LOrder != ROrder) + return LOrder != 0 && (LOrder < ROrder || ROrder == 0); + } // Try schedule def + use closer when Sethi-Ullman numbers are the same. // e.g. @@ -2012,40 +2296,62 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { // This creates more short live intervals. unsigned LDist = closestSucc(left); unsigned RDist = closestSucc(right); - if (LDist != RDist) + if (LDist != RDist) { + DEBUG(++FactorCount[FactOther]); return LDist < RDist; + } // How many registers becomes live when the node is scheduled. unsigned LScratch = calcMaxScratches(left); unsigned RScratch = calcMaxScratches(right); - if (LScratch != RScratch) + if (LScratch != RScratch) { + DEBUG(++FactorCount[FactOther]); return LScratch > RScratch; + } + + // Comparing latency against a call makes little sense unless the node + // is register pressure-neutral. + if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0)) + return (left->NodeQueueId > right->NodeQueueId); - if (!DisableSchedCycles) { + // Do not compare latencies when one or both of the nodes are calls. + if (!DisableSchedCycles && + !(left->isCall || right->isCall)) { int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); if (result != 0) return result > 0; } else { - if (left->getHeight() != right->getHeight()) + if (left->getHeight() != right->getHeight()) { + DEBUG(++FactorCount[FactHeight]); return left->getHeight() > right->getHeight(); + } - if (left->getDepth() != right->getDepth()) + if (left->getDepth() != right->getDepth()) { + DEBUG(++FactorCount[FactDepth]); return left->getDepth() < right->getDepth(); + } } assert(left->NodeQueueId && right->NodeQueueId && "NodeQueueId cannot be zero"); + DEBUG(++FactorCount[FactOther]); return (left->NodeQueueId > right->NodeQueueId); } // Bottom up bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + return BURRSort(left, right, SPQ); } // Source order, otherwise bottom up. bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + unsigned LOrder = SPQ->getNodeOrdering(left); unsigned ROrder = SPQ->getNodeOrdering(right); @@ -2077,6 +2383,9 @@ bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { // Return true if right should be scheduled with higher priority than left. bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + if (left->isCall || right->isCall) // No way to compute latency of calls. return BURRSort(left, right, SPQ); @@ -2086,16 +2395,18 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { // Avoid causing spills. If register pressure is high, schedule for // register pressure reduction. if (LHigh && !RHigh) { + DEBUG(++FactorCount[FactPressureDiff]); DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" << right->NodeNum << ")\n"); return true; } else if (!LHigh && RHigh) { + DEBUG(++FactorCount[FactPressureDiff]); DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" << left->NodeNum << ")\n"); return false; } - else if (!LHigh && !RHigh) { + if (!LHigh && !RHigh) { int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); if (result != 0) return result > 0; @@ -2112,34 +2423,118 @@ bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { != ScheduleHazardRecognizer::NoHazard) return false; - return SU->getHeight() <= CurCycle; + return true; } +static bool canEnableCoalescing(SUnit *SU) { + unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; + if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) + // CopyToReg should be close to its uses to facilitate coalescing and + // avoid spilling. + return true; + + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::INSERT_SUBREG) + // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be + // close to their uses to facilitate coalescing. + return true; + + if (SU->NumPreds == 0 && SU->NumSuccs != 0) + // If SU does not have a register def, schedule it close to its uses + // because it does not lengthen any live ranges. + return true; + + return false; +} + +// list-ilp is currently an experimental scheduler that allows various +// heuristics to be enabled prior to the normal register reduction logic. bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + if (left->isCall || right->isCall) // No way to compute latency of calls. return BURRSort(left, right, SPQ); - bool LHigh = SPQ->HighRegPressure(left); - bool RHigh = SPQ->HighRegPressure(right); - // Avoid causing spills. If register pressure is high, schedule for - // register pressure reduction. - if (LHigh && !RHigh) - return true; - else if (!LHigh && RHigh) - return false; - else if (!LHigh && !RHigh) { - // Low register pressure situation, schedule to maximize instruction level - // parallelism. - if (left->NumPreds > right->NumPreds) - return false; - else if (left->NumPreds < right->NumPreds) - return false; + unsigned LLiveUses = 0, RLiveUses = 0; + int LPDiff = 0, RPDiff = 0; + if (!DisableSchedRegPressure || !DisableSchedLiveUses) { + LPDiff = SPQ->RegPressureDiff(left, LLiveUses); + RPDiff = SPQ->RegPressureDiff(right, RLiveUses); + } + if (!DisableSchedRegPressure && LPDiff != RPDiff) { + DEBUG(++FactorCount[FactPressureDiff]); + DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff + << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); + return LPDiff > RPDiff; + } + + if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { + bool LReduce = canEnableCoalescing(left); + bool RReduce = canEnableCoalescing(right); + DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]); + if (LReduce && !RReduce) return false; + if (RReduce && !LReduce) return true; + } + + if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { + DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses + << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); + DEBUG(++FactorCount[FactRegUses]); + return LLiveUses < RLiveUses; + } + + if (!DisableSchedStalls) { + bool LStall = BUHasStall(left, left->getHeight(), SPQ); + bool RStall = BUHasStall(right, right->getHeight(), SPQ); + if (LStall != RStall) { + DEBUG(++FactorCount[FactHeight]); + return left->getHeight() > right->getHeight(); + } + } + + if (!DisableSchedCriticalPath) { + int spread = (int)left->getDepth() - (int)right->getDepth(); + if (std::abs(spread) > MaxReorderWindow) { + DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " + << left->getDepth() << " != SU(" << right->NodeNum << "): " + << right->getDepth() << "\n"); + DEBUG(++FactorCount[FactDepth]); + return left->getDepth() < right->getDepth(); + } + } + + if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { + int spread = (int)left->getHeight() - (int)right->getHeight(); + if (std::abs(spread) > MaxReorderWindow) { + DEBUG(++FactorCount[FactHeight]); + return left->getHeight() > right->getHeight(); + } } return BURRSort(left, right, SPQ); } +void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { + SUnits = &sunits; + // Add pseudo dependency edges for two-address nodes. + AddPseudoTwoAddrDeps(); + // Reroute edges to nodes with multiple uses. + if (!TracksRegPressure) + PrescheduleNodesWithMultipleUses(); + // Calculate node priorities. + CalculateSethiUllmanNumbers(); + + // For single block loops, mark nodes that look like canonical IV increments. + if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) { + for (unsigned i = 0, e = sunits.size(); i != e; ++i) { + initVRegCycle(&sunits[i]); + } + } +} + //===----------------------------------------------------------------------===// // Preschedule for Register Pressure //===----------------------------------------------------------------------===// @@ -2417,6 +2812,9 @@ static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, // Top down bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res < 0; + unsigned LPriority = SPQ->getNodePriority(left); unsigned RPriority = SPQ->getNodePriority(right); bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 477c1ff..9f2f012 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -27,12 +27,21 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; STATISTIC(LoadsClustered, "Number of loads clustered together"); +// This allows latency based scheduler to notice high latency instructions +// without a target itinerary. The choise if number here has more to do with +// balancing scheduler heursitics than with the actual machine latency. +static cl::opt<int> HighLatencyCycles( + "sched-high-latency-cycles", cl::Hidden, cl::init(10), + cl::desc("Roughly estimate the number of cycles that 'long latency'" + "instructions take for targets with no itinerary")); + ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) : ScheduleDAG(mf), InstrItins(mf.getTarget().getInstrItineraryData()) {} @@ -72,11 +81,15 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { SUnit *SU = NewSUnit(Old->getNode()); SU->OrigNode = Old->OrigNode; SU->Latency = Old->Latency; + SU->isVRegCycle = Old->isVRegCycle; SU->isCall = Old->isCall; + SU->isCallOp = Old->isCallOp; SU->isTwoAddress = Old->isTwoAddress; SU->isCommutable = Old->isCommutable; SU->hasPhysRegDefs = Old->hasPhysRegDefs; SU->hasPhysRegClobbers = Old->hasPhysRegClobbers; + SU->isScheduleHigh = Old->isScheduleHigh; + SU->isScheduleLow = Old->isScheduleLow; SU->SchedulingPref = Old->SchedulingPref; Old->isCloned = true; return SU; @@ -273,6 +286,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { Worklist.push_back(DAG->getRoot().getNode()); Visited.insert(DAG->getRoot().getNode()); + SmallVector<SUnit*, 8> CallSUnits; while (!Worklist.empty()) { SDNode *NI = Worklist.pop_back_val(); @@ -325,6 +339,15 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { if (!HasGlueUse) break; } + if (NodeSUnit->isCall) + CallSUnits.push_back(NodeSUnit); + + // Schedule zero-latency TokenFactor below any nodes that may increase the + // schedule height. Otherwise, ancestors of the TokenFactor may appear to + // have false stalls. + if (NI->getOpcode() == ISD::TokenFactor) + NodeSUnit->isScheduleLow = true; + // If there are glue operands involved, N is now the bottom-most node // of the sequence of nodes that are glued together. // Update the SUnit. @@ -338,6 +361,20 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // Assign the Latency field of NodeSUnit using target-provided information. ComputeLatency(NodeSUnit); } + + // Find all call operands. + while (!CallSUnits.empty()) { + SUnit *SU = CallSUnits.pop_back_val(); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->getOpcode() != ISD::CopyToReg) + continue; + SDNode *SrcN = SUNode->getOperand(2).getNode(); + if (isPassiveNode(SrcN)) continue; // Not scheduled. + SUnit *SrcSU = &SUnits[SrcN->getNodeId()]; + SrcSU->isCallOp = true; + } + } } void ScheduleDAGSDNodes::AddSchedEdges() { @@ -403,6 +440,10 @@ void ScheduleDAGSDNodes::AddSchedEdges() { // If this is a ctrl dep, latency is 1. unsigned OpLatency = isChain ? 1 : OpSU->Latency; + // Special-case TokenFactor chains as zero-latency. + if(isChain && OpN->getOpcode() == ISD::TokenFactor) + OpLatency = 0; + const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, OpLatency, PhysReg); if (!isChain && !UnitLatencies) { @@ -410,11 +451,15 @@ void ScheduleDAGSDNodes::AddSchedEdges() { ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); } - if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 0) { + if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { // Multiple register uses are combined in the same SUnit. For example, // we could have a set of glued nodes with all their defs consumed by // another set of glued nodes. Register pressure tracking sees this as // a single use, so to keep pressure balanced we reduce the defs. + // + // We can't tell (without more book-keeping) if this results from + // glued nodes or duplicate operands. As long as we don't reduce + // NumRegDefsLeft to zero, we handle the common cases well. --OpSU->NumRegDefsLeft; } } @@ -437,6 +482,10 @@ void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { // Initialize NumNodeDefs for the current Node's opcode. void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() { + // Check for phys reg copy. + if (!Node) + return; + if (!Node->isMachineOpcode()) { if (Node->getOpcode() == ISD::CopyFromReg) NodeNumDefs = 1; @@ -499,6 +548,16 @@ void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { } void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { + SDNode *N = SU->getNode(); + + // TokenFactor operands are considered zero latency, and some schedulers + // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero + // whenever node latency is nonzero. + if (N && N->getOpcode() == ISD::TokenFactor) { + SU->Latency = 0; + return; + } + // Check to see if the scheduler cares about latencies. if (ForceUnitLatencies()) { SU->Latency = 1; @@ -506,7 +565,11 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { } if (!InstrItins || InstrItins->isEmpty()) { - SU->Latency = 1; + if (N && N->isMachineOpcode() && + TII->isHighLatencyDef(N->getMachineOpcode())) + SU->Latency = HighLatencyCycles; + else + SU->Latency = 1; return; } @@ -573,7 +636,7 @@ namespace { }; } -/// ProcessSDDbgValues - Process SDDbgValues assoicated with this node. +/// ProcessSDDbgValues - Process SDDbgValues associated with this node. static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index cc7310e..b5f68f3 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -80,6 +80,12 @@ namespace llvm { /// flagged together nodes with a single SUnit. virtual void BuildSchedGraph(AliasAnalysis *AA); + /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is + /// CopyToReg and its only active data operands are CopyFromReg within a + /// single block loop. + /// + void InitVRegCycleFlag(SUnit *SU); + /// InitNumRegDefsLeft - Determine the # of regs defined by this node. /// void InitNumRegDefsLeft(SUnit *SU); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 9120288..c2711c8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1418,9 +1418,9 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { /// getShiftAmountOperand - Return the specified value casted to /// the target's desired shift amount type. -SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) { +SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { EVT OpTy = Op.getValueType(); - MVT ShTy = TLI.getShiftAmountTy(OpTy); + MVT ShTy = TLI.getShiftAmountTy(LHSTy); if (OpTy == ShTy || OpTy.isVector()) return Op; ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; @@ -2482,6 +2482,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, "Vector element count mismatch!"); if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + else if (OpOpcode == ISD::UNDEF) + // sext(undef) = 0, because the top bits will all be the same. + return getConstant(0, VT); break; case ISD::ZERO_EXTEND: assert(VT.isInteger() && Operand.getValueType().isInteger() && @@ -2496,6 +2499,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x) return getNode(ISD::ZERO_EXTEND, DL, VT, Operand.getNode()->getOperand(0)); + else if (OpOpcode == ISD::UNDEF) + // zext(undef) = 0, because the top bits will be zero. + return getConstant(0, VT); break; case ISD::ANY_EXTEND: assert(VT.isInteger() && Operand.getValueType().isInteger() && @@ -2512,6 +2518,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, OpOpcode == ISD::ANY_EXTEND) // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + else if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); // (ext (trunx x)) -> x if (OpOpcode == ISD::TRUNCATE) { @@ -5904,7 +5912,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::UINT_TO_FP: return "uint_to_fp"; case ISD::FP_TO_SINT: return "fp_to_sint"; case ISD::FP_TO_UINT: return "fp_to_uint"; - case ISD::BITCAST: return "bit_convert"; + case ISD::BITCAST: return "bitcast"; case ISD::FP16_TO_FP32: return "fp16_to_fp32"; case ISD::FP32_TO_FP16: return "fp32_to_fp16"; @@ -6226,6 +6234,9 @@ static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, return; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + // Don't follow chain operands. + if (N->getOperand(i).getValueType() == MVT::Other) + continue; OS << '\n'; printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2); } @@ -6238,7 +6249,7 @@ void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G, void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const { // Don't print impossibly deep things. - printrWithDepth(OS, G, 100); + printrWithDepth(OS, G, 10); } void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const { @@ -6247,7 +6258,7 @@ void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const { void SDNode::dumprFull(const SelectionDAG *G) const { // Don't print impossibly deep things. - dumprWithDepth(G, 100); + dumprWithDepth(G, 10); } static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { @@ -6311,7 +6322,8 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { case ISD::ROTL: case ISD::ROTR: Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], - getShiftAmountOperand(Operands[1]))); + getShiftAmountOperand(Operands[0].getValueType(), + Operands[1]))); break; case ISD::SIGN_EXTEND_INREG: case ISD::FP_ROUND_INREG: { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 48d9bbb..b02a7b6 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -50,7 +50,6 @@ #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -84,9 +83,7 @@ LimitFPPrecision("limit-float-precision", // %buffer = alloca [4096 x i8] // %data = load [4096 x i8]* %argPtr // store [4096 x i8] %data, [4096 x i8]* %buffer -static cl::opt<unsigned> -MaxParallelChains("dag-chain-limit", cl::desc("Max parallel isel dag chains"), - cl::init(64), cl::Hidden); +static const unsigned MaxParallelChains = 64; static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, @@ -1130,15 +1127,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { else if (F->paramHasAttr(0, Attribute::ZExt)) ExtendKind = ISD::ZERO_EXTEND; - // FIXME: C calling convention requires the return type to be promoted - // to at least 32-bit. But this is not necessary for non-C calling - // conventions. The frontend should mark functions whose return values - // require promoting with signext or zeroext attributes. - if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { - EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32); - if (VT.bitsLT(MinVT)) - VT = MinVT; - } + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) + VT = TLI.getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind); unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); @@ -1153,9 +1143,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { Flags.setInReg(); // Propagate extension type if any - if (F->paramHasAttr(0, Attribute::SExt)) + if (ExtendKind == ISD::SIGN_EXTEND) Flags.setSExt(); - else if (F->paramHasAttr(0, Attribute::ZExt)) + else if (ExtendKind == ISD::ZERO_EXTEND) Flags.setZExt(); for (unsigned i = 0; i < NumParts; ++i) { @@ -2029,9 +2019,13 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, APInt Range = ComputeRange(LEnd, RBegin); assert((Range - 2ULL).isNonNegative() && "Invalid case distance"); - double LDensity = (double)LSize.roundToDouble() / + // Use volatile double here to avoid excess precision issues on some hosts, + // e.g. that use 80-bit X87 registers. + volatile double LDensity = + (double)LSize.roundToDouble() / (LEnd - First + 1ULL).roundToDouble(); - double RDensity = (double)RSize.roundToDouble() / + volatile double RDensity = + (double)RSize.roundToDouble() / (Last - RBegin + 1ULL).roundToDouble(); double Metric = Range.logBase2()*(LDensity+RDensity); // Should always split in some non-trivial place @@ -4039,10 +4033,6 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, if (DV.isInlinedFnArgument(MF.getFunction())) return false; - MachineBasicBlock *MBB = FuncInfo.MBB; - if (MBB != &MF.front()) - return false; - unsigned Reg = 0; if (Arg->hasByValAttr()) { // Byval arguments' frame index is recorded during argument lowering. @@ -4413,7 +4403,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_sjlj_dispatch_setup: { DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, - getRoot(), getValue(I.getArgOperand(0)))); + getRoot())); return 0; } @@ -4682,9 +4672,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::flt_rounds: setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); return 0; - case Intrinsic::trap: - DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); + case Intrinsic::trap: { + StringRef TrapFuncName = getTrapFunctionName(); + if (TrapFuncName.empty()) { + DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); + return 0; + } + TargetLowering::ArgListTy Args; + std::pair<SDValue, SDValue> Result = + TLI.LowerCallTo(getRoot(), I.getType(), + false, false, false, false, 0, CallingConv::C, + /*isTailCall=*/false, /*isReturnValueUsed=*/true, + DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), + Args, DAG, getCurDebugLoc()); + DAG.setRoot(Result.second); return 0; + } case Intrinsic::uadd_with_overflow: return implVisitAluOverflow(I, ISD::UADDO); case Intrinsic::sadd_with_overflow: @@ -4937,15 +4940,21 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), DAG.getVTList(&RetTys[0], RetTys.size()), &ReturnValues[0], ReturnValues.size())); - } - // As a special case, a null chain means that a tail call has been emitted and - // the DAG root is already updated. - if (Result.second.getNode()) - DAG.setRoot(Result.second); - else + // Assign order to nodes here. If the call does not produce a result, it won't + // be mapped to a SDNode and visit() will not assign it an order number. + if (!Result.second.getNode()) { + // As a special case, a null chain means that a tail call has been emitted and + // the DAG root is already updated. HasTailCall = true; + ++SDNodeOrder; + AssignOrderingToNode(DAG.getRoot().getNode()); + } else { + DAG.setRoot(Result.second); + ++SDNodeOrder; + AssignOrderingToNode(Result.second.getNode()); + } if (LandingPad) { // Insert a label at the end of the invoke call to mark the try range. This @@ -5211,12 +5220,11 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { LowerCallTo(&I, Callee, I.isTailCall()); } -namespace llvm { +namespace { /// AsmOperandInfo - This contains information for each constraint that we are /// lowering. -class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo : - public TargetLowering::AsmOperandInfo { +class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { public: /// CallOperand - If this is the result output operand or a clobber /// this is null, otherwise it is the incoming operand to the CallInst. @@ -5304,7 +5312,7 @@ private: typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; -} // end llvm namespace. +} // end anonymous namespace /// isAllocatableRegister - If the specified register is safe to allocate, /// i.e. it isn't a stack pointer or some other special register, return the @@ -5363,11 +5371,13 @@ isAllocatableRegister(unsigned Reg, MachineFunction &MF, /// OpInfo describes the operand. /// Input and OutputRegs are the set of already allocated physical registers. /// -void SelectionDAGBuilder:: -GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, - std::set<unsigned> &OutputRegs, - std::set<unsigned> &InputRegs) { - LLVMContext &Context = FuncInfo.Fn->getContext(); +static void GetRegistersForValue(SelectionDAG &DAG, + const TargetLowering &TLI, + DebugLoc DL, + SDISelAsmOperandInfo &OpInfo, + std::set<unsigned> &OutputRegs, + std::set<unsigned> &InputRegs) { + LLVMContext &Context = *DAG.getContext(); // Compute whether this value requires an input register, an output register, // or both. @@ -5413,7 +5423,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, // vector types). EVT RegVT = *PhysReg.second->vt_begin(); if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { - OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), + OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { @@ -5423,7 +5433,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, // machine. RegVT = EVT::getIntegerVT(Context, OpInfo.ConstraintVT.getSizeInBits()); - OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), + OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; } @@ -5694,7 +5704,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this constraint is for a specific register, allocate it before // anything else. if (OpInfo.ConstraintType == TargetLowering::C_Register) - GetRegistersForValue(OpInfo, OutputRegs, InputRegs); + GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs, + InputRegs); } // Second pass - Loop over all of the operands, assigning virtual or physregs @@ -5705,7 +5716,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // C_Register operands have already been allocated, Other/Memory don't need // to be. if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) - GetRegistersForValue(OpInfo, OutputRegs, InputRegs); + GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs, + InputRegs); } // AsmNodeOperands - The operands for the ISD::INLINEASM node. @@ -6181,7 +6193,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, // For a function returning void, there is no return value. We can't create // such a node, so we just return a null return value in that case. In - // that case, nothing will actualy look at the value. + // that case, nothing will actually look at the value. if (ReturnValues.empty()) return std::make_pair(SDValue(), Chain); @@ -6397,7 +6409,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { SDB->setValue(I, Res); // If this argument is live outside of the entry block, insert a copy from - // whereever we got it to the vreg that other BB's will reference it as. + // wherever we got it to the vreg that other BB's will reference it as. SDB->CopyToExportRegsIfNeeded(I); } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 8f466d9..a689b76 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -23,7 +23,6 @@ #include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" #include <vector> -#include <set> namespace llvm { @@ -60,7 +59,6 @@ class MDNode; class PHINode; class PtrToIntInst; class ReturnInst; -class SDISelAsmOperandInfo; class SDDbgValue; class SExtInst; class SelectInst; @@ -380,10 +378,6 @@ public: assert(N.getNode() == 0 && "Already set a value for this node!"); N = NewN; } - - void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, - std::set<unsigned> &OutputRegs, - std::set<unsigned> &InputRegs); void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 68ba966..fdf3767 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -421,10 +421,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { return true; } -void -SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, - BasicBlock::const_iterator End, - bool &HadTailCall) { +void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, + BasicBlock::const_iterator End, + bool &HadTailCall) { // Lower all of the non-terminator instructions. If a call is emitted // as a tail call, cease emitting nodes for this block. Terminators // are handled below. @@ -438,7 +437,6 @@ SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, // Final step, emit the lowered DAG as machine code. CodeGenAndEmitDAG(); - return; } void SelectionDAGISel::ComputeLiveOutVRegInfo() { @@ -489,13 +487,19 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { if (TimePassesIsEnabled) GroupName = "Instruction Selection and Scheduling"; std::string BlockName; + int BlockNumber = -1; +#ifdef NDEBUG if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs || ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || ViewSUnitDAGs) +#endif + { + BlockNumber = FuncInfo->MBB->getNumber(); BlockName = MF->getFunction()->getNameStr() + ":" + FuncInfo->MBB->getBasicBlock()->getNameStr(); - - DEBUG(dbgs() << "Initial selection DAG:\n"; CurDAG->dump()); + } + DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName); @@ -505,7 +509,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(Unrestricted, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized lowered selection DAG:\n"; CurDAG->dump()); + DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); // Second step, hack on the DAG until it only uses operations and types that // the target supports. @@ -518,7 +523,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { Changed = CurDAG->LegalizeTypes(); } - DEBUG(dbgs() << "Type-legalized selection DAG:\n"; CurDAG->dump()); + DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); if (Changed) { if (ViewDAGCombineLT) @@ -531,8 +537,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n"; - CurDAG->dump()); + DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); } { @@ -556,8 +562,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n"; - CurDAG->dump()); + DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#" + << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); } if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName); @@ -567,7 +573,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Legalize(OptLevel); } - DEBUG(dbgs() << "Legalized selection DAG:\n"; CurDAG->dump()); + DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName); @@ -577,7 +584,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized legalized selection DAG:\n"; CurDAG->dump()); + DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); if (OptLevel != CodeGenOpt::None) ComputeLiveOutVRegInfo(); @@ -591,7 +599,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DoInstructionSelection(); } - DEBUG(dbgs() << "Selected selection DAG:\n"; CurDAG->dump()); + DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName); @@ -632,7 +641,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { } void SelectionDAGISel::DoInstructionSelection() { - DEBUG(errs() << "===== Instruction selection begins:\n"); + DEBUG(errs() << "===== Instruction selection begins: BB#" + << FuncInfo->MBB->getNumber() + << " '" << FuncInfo->MBB->getName() << "'\n"); PreprocessISelDAG(); @@ -735,16 +746,49 @@ void SelectionDAGISel::PrepareEHLandingPad() { - +/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified +/// load into the specified FoldInst. Note that we could have a sequence where +/// multiple LLVM IR instructions are folded into the same machineinstr. For +/// example we could have: +/// A: x = load i32 *P +/// B: y = icmp A, 42 +/// C: br y, ... +/// +/// In this scenario, LI is "A", and FoldInst is "C". We know about "B" (and +/// any other folded instructions) because it is between A and C. +/// +/// If we succeed in folding the load into the operation, return true. +/// bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI, + const Instruction *FoldInst, FastISel *FastIS) { + // We know that the load has a single use, but don't know what it is. If it + // isn't one of the folded instructions, then we can't succeed here. Handle + // this by scanning the single-use users of the load until we get to FoldInst. + unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. + + const Instruction *TheUser = LI->use_back(); + while (TheUser != FoldInst && // Scan up until we find FoldInst. + // Stay in the right block. + TheUser->getParent() == FoldInst->getParent() && + --MaxUsers) { // Don't scan too far. + // If there are multiple or no uses of this instruction, then bail out. + if (!TheUser->hasOneUse()) + return false; + + TheUser = TheUser->use_back(); + } + // Don't try to fold volatile loads. Target has to deal with alignment // constraints. if (LI->isVolatile()) return false; - // Figure out which vreg this is going into. + // Figure out which vreg this is going into. If there is no assigned vreg yet + // then there actually was no reference to it. Perhaps the load is referenced + // by a dead instruction. unsigned LoadReg = FastIS->getRegForValue(LI); - assert(LoadReg && "Load isn't already assigned a vreg? "); + if (LoadReg == 0) + return false; // Check to see what the uses of this vreg are. If it has no uses, or more // than one use (at the machine instr level) then we can't fold it. @@ -764,7 +808,7 @@ bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI, "The only use of the vreg must be a use, we haven't emitted the def!"); MachineInstr *User = &*RI; - + // Set the insertion point properly. Folding the load can cause generation of // other random instructions (like sign extends) for addressing modes, make // sure they get inserted in a logical place before the new instruction. @@ -817,6 +861,17 @@ static void CheckLineNumbers(const MachineBasicBlock *MBB) { } #endif +/// isFoldedOrDeadInstruction - Return true if the specified instruction is +/// side-effect free and is either dead or folded into a generated instruction. +/// Return false if it needs to be emitted. +static bool isFoldedOrDeadInstruction(const Instruction *I, + FunctionLoweringInfo *FuncInfo) { + return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded. + !isa<TerminatorInst>(I) && // Terminators aren't folded. + !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded. + !FuncInfo->isExportedInst(I); // Exported instrs must be computed. +} + void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; @@ -843,15 +898,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } if (AllPredsVisited) { - for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end(); - I != E && isa<PHINode>(I); ++I) { + for (BasicBlock::const_iterator I = LLVMBB->begin(); + isa<PHINode>(I); ++I) FuncInfo->ComputePHILiveOutRegInfo(cast<PHINode>(I)); - } } else { - for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end(); - I != E && isa<PHINode>(I); ++I) { + for (BasicBlock::const_iterator I = LLVMBB->begin(); + isa<PHINode>(I); ++I) FuncInfo->InvalidatePHILiveOutRegInfo(cast<PHINode>(I)); - } } FuncInfo->VisitedBBs.insert(LLVMBB); @@ -899,10 +952,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { const Instruction *Inst = llvm::prior(BI); // If we no longer require this instruction, skip it. - if (!Inst->mayWriteToMemory() && - !isa<TerminatorInst>(Inst) && - !isa<DbgInfoIntrinsic>(Inst) && - !FuncInfo->isExportedInst(Inst)) + if (isFoldedOrDeadInstruction(Inst, FuncInfo)) continue; // Bottom-up: reset the insert pos at the top, after any local-value @@ -911,16 +961,20 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Try to select the instruction with FastISel. if (FastIS->SelectInstruction(Inst)) { - // If fast isel succeeded, check to see if there is a single-use - // non-volatile load right before the selected instruction, and see if - // the load is used by the instruction. If so, try to fold it. - const Instruction *BeforeInst = 0; - if (Inst != Begin) - BeforeInst = llvm::prior(llvm::prior(BI)); - if (BeforeInst && isa<LoadInst>(BeforeInst) && - BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst && - TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), FastIS)) - --BI; // If we succeeded, don't re-select the load. + // If fast isel succeeded, skip over all the folded instructions, and + // then see if there is a load right before the selected instructions. + // Try to fold the load if so. + const Instruction *BeforeInst = Inst; + while (BeforeInst != Begin) { + BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst)); + if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo)) + break; + } + if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) && + BeforeInst->hasOneUse() && + TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) + // If we succeeded, don't re-select the load. + BI = llvm::next(BasicBlock::const_iterator(BeforeInst)); continue; } @@ -974,11 +1028,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { else ++NumFastIselBlocks; - // Run SelectionDAG instruction selection on the remainder of the block - // not handled by FastISel. If FastISel is not run, this is the entire - // block. - bool HadTailCall; - SelectBasicBlock(Begin, BI, HadTailCall); + if (Begin != BI) { + // Run SelectionDAG instruction selection on the remainder of the block + // not handled by FastISel. If FastISel is not run, this is the entire + // block. + bool HadTailCall; + SelectBasicBlock(Begin, BI, HadTailCall); + } FinishBasicBlock(); FuncInfo->PHINodesToUpdate.clear(); @@ -2392,6 +2448,18 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, CurDAG->getRegister(RegNo, VT), (SDNode*)0)); continue; } + case OPC_EmitRegister2: { + // For targets w/ more than 256 register names, the register enum + // values are stored in two bytes in the matcher table (just like + // opcodes). + MVT::SimpleValueType VT = + (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + unsigned RegNo = MatcherTable[MatcherIndex++]; + RegNo |= MatcherTable[MatcherIndex++] << 8; + RecordedNodes.push_back(std::pair<SDValue, SDNode*>( + CurDAG->getRegister(RegNo, VT), (SDNode*)0)); + continue; + } case OPC_EmitConvertToTarget: { // Convert from IMM/FPIMM to target version. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 76eb945..cd1647b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -90,7 +90,8 @@ namespace llvm { /// If you want to override the dot attributes printed for a particular /// edge, override this method. template<typename EdgeIter> - static std::string getEdgeAttributes(const void *Node, EdgeIter EI) { + static std::string getEdgeAttributes(const void *Node, EdgeIter EI, + const SelectionDAG *Graph) { SDValue Op = EI.getNode()->getOperand(EI.getOperand()); EVT VT = Op.getValueType(); if (VT == MVT::Glue) diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 35b847c..15606af 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -93,6 +93,19 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::UREM_I32] = "__umodsi3"; Names[RTLIB::UREM_I64] = "__umoddi3"; Names[RTLIB::UREM_I128] = "__umodti3"; + + // These are generally not available. + Names[RTLIB::SDIVREM_I8] = 0; + Names[RTLIB::SDIVREM_I16] = 0; + Names[RTLIB::SDIVREM_I32] = 0; + Names[RTLIB::SDIVREM_I64] = 0; + Names[RTLIB::SDIVREM_I128] = 0; + Names[RTLIB::UDIVREM_I8] = 0; + Names[RTLIB::UDIVREM_I16] = 0; + Names[RTLIB::UDIVREM_I32] = 0; + Names[RTLIB::UDIVREM_I64] = 0; + Names[RTLIB::UDIVREM_I128] = 0; + Names[RTLIB::NEG_I32] = "__negsi2"; Names[RTLIB::NEG_I64] = "__negdi2"; Names[RTLIB::ADD_F32] = "__addsf3"; @@ -1665,6 +1678,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1)); if (!ShAmt) break; + SDValue Shift = In.getOperand(1); + if (TLO.LegalTypes()) { + uint64_t ShVal = ShAmt->getZExtValue(); + Shift = + TLO.DAG.getConstant(ShVal, getShiftAmountTy(Op.getValueType())); + } + APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth); HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth); @@ -1678,7 +1698,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), NewTrunc, - In.getOperand(1))); + Shift)); } break; } @@ -1829,7 +1849,6 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, DebugLoc dl) const { SelectionDAG &DAG = DCI.DAG; - LLVMContext &Context = *DAG.getContext(); // These setcc operations always fold. switch (Cond) { @@ -1840,12 +1859,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, case ISD::SETTRUE2: return DAG.getConstant(1, VT); } - if (isa<ConstantSDNode>(N0.getNode())) { - // Ensure that the constant occurs on the RHS, and fold constant - // comparisons. + // Ensure that the constant occurs on the RHS, and fold constant + // comparisons. + if (isa<ConstantSDNode>(N0.getNode())) return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); - } - + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { const APInt &C1 = N1C->getAPIntValue(); @@ -1898,6 +1916,42 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal. } + // (zext x) == C --> x == (trunc C) + if (DCI.isBeforeLegalize() && N0->hasOneUse() && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + unsigned MinBits = N0.getValueSizeInBits(); + SDValue PreZExt; + if (N0->getOpcode() == ISD::ZERO_EXTEND) { + // ZExt + MinBits = N0->getOperand(0).getValueSizeInBits(); + PreZExt = N0->getOperand(0); + } else if (N0->getOpcode() == ISD::AND) { + // DAGCombine turns costly ZExts into ANDs + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) + if ((C->getAPIntValue()+1).isPowerOf2()) { + MinBits = C->getAPIntValue().countTrailingOnes(); + PreZExt = N0->getOperand(0); + } + } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) { + // ZEXTLOAD + if (LN0->getExtensionType() == ISD::ZEXTLOAD) { + MinBits = LN0->getMemoryVT().getSizeInBits(); + PreZExt = N0; + } + } + + // Make sure we're not loosing bits from the constant. + if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) { + EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits); + if (isTypeDesirableForOp(ISD::SETCC, MinVT)) { + // Will get folded away. + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreZExt); + SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT); + return DAG.getSetCC(dl, VT, Trunc, C, Cond); + } + } + } + // If the LHS is '(and load, const)', the RHS is 0, // the test is for equality or unsigned, and all 1 bits of the const are // in the same partial word, see if we can shorten the load. @@ -1936,7 +1990,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } if (bestWidth) { - EVT newVT = EVT::getIntegerVT(Context, bestWidth); + EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth); if (newVT.isRound()) { EVT PtrType = Lod->getOperand(1).getValueType(); SDValue Ptr = Lod->getBasePtr(); @@ -3174,26 +3228,39 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, // FIXME: We should use a narrower constant when the upper // bits are known to be zero. - ConstantSDNode *N1C = cast<ConstantSDNode>(N->getOperand(1)); - APInt::mu magics = N1C->getAPIntValue().magicu(); + const APInt &N1C = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue(); + APInt::mu magics = N1C.magicu(); + + SDValue Q = N->getOperand(0); + + // If the divisor is even, we can avoid using the expensive fixup by shifting + // the divided value upfront. + if (magics.a != 0 && !N1C[0]) { + unsigned Shift = N1C.countTrailingZeros(); + Q = DAG.getNode(ISD::SRL, dl, VT, Q, + DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType()))); + if (Created) + Created->push_back(Q.getNode()); + + // Get magic number for the shifted divisor. + magics = N1C.lshr(Shift).magicu(Shift); + assert(magics.a == 0 && "Should use cheap fixup now"); + } // Multiply the numerator (operand 0) by the magic value // FIXME: We should support doing a MUL in a wider type - SDValue Q; if (isOperationLegalOrCustom(ISD::MULHU, VT)) - Q = DAG.getNode(ISD::MULHU, dl, VT, N->getOperand(0), - DAG.getConstant(magics.m, VT)); + Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT)); else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) - Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), - N->getOperand(0), - DAG.getConstant(magics.m, VT)).getNode(), 1); + Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q, + DAG.getConstant(magics.m, VT)).getNode(), 1); else return SDValue(); // No mulhu or equvialent if (Created) Created->push_back(Q.getNode()); if (magics.a == 0) { - assert(magics.s < N1C->getAPIntValue().getBitWidth() && + assert(magics.s < N1C.getBitWidth() && "We shouldn't generate an undefined shift!"); return DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType()))); diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp index 7b5bca4..160f38f 100644 --- a/lib/CodeGen/ShrinkWrapping.cpp +++ b/lib/CodeGen/ShrinkWrapping.cpp @@ -277,7 +277,7 @@ void PEI::calculateAnticAvail(MachineFunction &Fn) { // Initialize data flow sets. clearAnticAvailSets(); - // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. + // Calculate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. bool changed = true; unsigned iterations = 0; while (changed) { diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 2843c1a..35b8e14 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -104,6 +104,18 @@ void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } +void SimpleRegisterCoalescing::markAsJoined(MachineInstr *CopyMI) { + /// Joined copies are not deleted immediately, but kept in JoinedCopies. + JoinedCopies.insert(CopyMI); + + /// Mark all register operands of CopyMI as <undef> so they won't affect dead + /// code elimination. + for (MachineInstr::mop_iterator I = CopyMI->operands_begin(), + E = CopyMI->operands_end(); I != E; ++I) + if (I->isReg()) + I->setIsUndef(true); +} + /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA /// being the source and IntB being the dest, thus this defines a value number /// in IntB. If the source value number (in IntA) is defined by a copy from B, @@ -196,15 +208,14 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP, if (ValLR+1 != BLR) return false; // If a live interval is a physical register, conservatively check if any - // of its sub-registers is overlapping the live interval of the virtual - // register. If so, do not coalesce. - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg) && - *tri_->getSubRegisters(IntB.reg)) { - for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) - if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) { + // of its aliases is overlapping the live interval of the virtual register. + // If so, do not coalesce. + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { + for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) + if (li_->hasInterval(*AS) && IntA.overlaps(li_->getInterval(*AS))) { DEBUG({ - dbgs() << "\t\tInterfere with sub-register "; - li_->getInterval(*SR).print(dbgs(), tri_); + dbgs() << "\t\tInterfere with alias "; + li_->getInterval(*AS).print(dbgs(), tri_); }); return false; } @@ -471,7 +482,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); assert(DVNI->def == DefIdx); BValNo = IntB.MergeValueNumberInto(BValNo, DVNI); - JoinedCopies.insert(UseMI); + markAsJoined(UseMI); } // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition @@ -901,6 +912,58 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li, return removeIntervalIfEmpty(li, li_, tri_); } +/// shouldJoinPhys - Return true if a copy involving a physreg should be joined. +/// We need to be careful about coalescing a source physical register with a +/// virtual register. Once the coalescing is done, it cannot be broken and these +/// are not spillable! If the destination interval uses are far away, think +/// twice about coalescing them! +bool SimpleRegisterCoalescing::shouldJoinPhys(CoalescerPair &CP) { + bool Allocatable = li_->isAllocatable(CP.getDstReg()); + LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg()); + + /// Always join simple intervals that are defined by a single copy from a + /// reserved register. This doesn't increase register pressure, so it is + /// always beneficial. + if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue()) + return true; + + if (DisablePhysicalJoin) { + DEBUG(dbgs() << "\tPhysreg joins disabled.\n"); + return false; + } + + // Only coalesce to allocatable physreg, we don't want to risk modifying + // reserved registers. + if (!Allocatable) { + DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); + return false; // Not coalescable. + } + + // Don't join with physregs that have a ridiculous number of live + // ranges. The data structure performance is really bad when that + // happens. + if (li_->hasInterval(CP.getDstReg()) && + li_->getInterval(CP.getDstReg()).ranges.size() > 1000) { + ++numAborts; + DEBUG(dbgs() + << "\tPhysical register live interval too complicated, abort!\n"); + return false; + } + + // FIXME: Why are we skipping this test for partial copies? + // CodeGen/X86/phys_subreg_coalesce-3.ll needs it. + if (!CP.isPartial()) { + const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg()); + unsigned Threshold = allocatableRCRegs_[RC].count() * 2; + unsigned Length = li_->getApproximateInstructionCount(JoinVInt); + if (Length > Threshold) { + ++numAborts; + DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); + return false; + } + } + return true; +} /// isWinToJoinCrossClass - Return true if it's profitable to coalesce /// two virtual registers from different register classes. @@ -973,27 +1036,25 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { return false; // Not coalescable. } - if (DisablePhysicalJoin && CP.isPhys()) { - DEBUG(dbgs() << "\tPhysical joins disabled.\n"); - return false; - } - - DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_)); + DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_) + << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx()) + << "\n"); // Enforce policies. if (CP.isPhys()) { - DEBUG(dbgs() <<" with physreg " << PrintReg(CP.getDstReg(), tri_) << "\n"); - // Only coalesce to allocatable physreg. - if (!li_->isAllocatable(CP.getDstReg())) { - DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); - return false; // Not coalescable. + if (!shouldJoinPhys(CP)) { + // Before giving up coalescing, if definition of source is defined by + // trivial computation, try rematerializing it. + if (!CP.isFlipped() && + ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true, + CP.getDstReg(), 0, CopyMI)) + return true; + return false; } } else { - DEBUG(dbgs() << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx()) - << " to " << CP.getNewRC()->getName() << "\n"); - // Avoid constraining virtual register regclass too much. if (CP.isCrossClass()) { + DEBUG(dbgs() << "\tCross-class to " << CP.getNewRC()->getName() << ".\n"); if (DisableCrossClassJoin) { DEBUG(dbgs() << "\tCross-class joins disabled.\n"); return false; @@ -1002,8 +1063,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { mri_->getRegClass(CP.getSrcReg()), mri_->getRegClass(CP.getDstReg()), CP.getNewRC())) { - DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: " - << CP.getNewRC()->getName() << ".\n"); + DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1015,45 +1075,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { CP.flip(); } - // We need to be careful about coalescing a source physical register with a - // virtual register. Once the coalescing is done, it cannot be broken and - // these are not spillable! If the destination interval uses are far away, - // think twice about coalescing them! - // FIXME: Why are we skipping this test for partial copies? - // CodeGen/X86/phys_subreg_coalesce-3.ll needs it. - if (!CP.isPartial() && CP.isPhys()) { - LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg()); - - // Don't join with physregs that have a ridiculous number of live - // ranges. The data structure performance is really bad when that - // happens. - if (li_->hasInterval(CP.getDstReg()) && - li_->getInterval(CP.getDstReg()).ranges.size() > 1000) { - ++numAborts; - DEBUG(dbgs() - << "\tPhysical register live interval too complicated, abort!\n"); - return false; - } - - const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg()); - unsigned Threshold = allocatableRCRegs_[RC].count() * 2; - unsigned Length = li_->getApproximateInstructionCount(JoinVInt); - if (Length > Threshold && - std::distance(mri_->use_nodbg_begin(CP.getSrcReg()), - mri_->use_nodbg_end()) * Threshold < Length) { - // Before giving up coalescing, if definition of source is defined by - // trivial computation, try rematerializing it. - if (!CP.isFlipped() && - ReMaterializeTrivialDef(JoinVInt, true, CP.getDstReg(), 0, CopyMI)) - return true; - - ++numAborts; - DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); - Again = true; // May be possible to coalesce later. - return false; - } - } - // Okay, attempt to join these two intervals. On failure, this returns false. // Otherwise, if one of the intervals being joined is a physreg, this method // always canonicalizes DstInt to be it. The output "SrcInt" will not have @@ -1072,7 +1093,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { if (!CP.isPartial()) { if (AdjustCopiesBackFrom(CP, CopyMI) || RemoveCopyByCommutingDef(CP, CopyMI)) { - JoinedCopies.insert(CopyMI); + markAsJoined(CopyMI); DEBUG(dbgs() << "\tTrivial!\n"); return true; } @@ -1092,7 +1113,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } // Remember to delete the copy instruction. - JoinedCopies.insert(CopyMI); + markAsJoined(CopyMI); UpdateRegDefsUses(CP); @@ -1568,9 +1589,7 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start, if (UseMI->isIdentityCopy()) continue; SlotIndex Idx = li_->getInstructionIndex(UseMI); - // FIXME: Should this be Idx != UseIdx? SlotIndex() will return something - // that compares higher than any other interval. - if (Idx >= Start && Idx < End && Idx >= UseIdx) { + if (Idx >= Start && Idx < End && (!UseIdx.isValid() || Idx >= UseIdx)) { LastUse = &Use; UseIdx = Idx.getUseIndex(); } diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h index 56703df..65cf542 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/lib/CodeGen/SimpleRegisterCoalescing.h @@ -68,16 +68,6 @@ namespace llvm { initializeSimpleRegisterCoalescingPass(*PassRegistry::getPassRegistry()); } - struct InstrSlots { - enum { - LOAD = 0, - USE = 1, - DEF = 2, - STORE = 3, - NUM = 4 - }; - }; - virtual void getAnalysisUsage(AnalysisUsage &AU) const; virtual void releaseMemory(); @@ -148,6 +138,9 @@ namespace llvm { unsigned DstReg, unsigned DstSubIdx, MachineInstr *CopyMI); + /// shouldJoinPhys - Return true if a physreg copy should be joined. + bool shouldJoinPhys(CoalescerPair &CP); + /// isWinToJoinCrossClass - Return true if it's profitable to coalesce /// two virtual registers from different register classes. bool isWinToJoinCrossClass(unsigned SrcReg, @@ -186,6 +179,9 @@ namespace llvm { /// cycles Start and End or NULL if there are no uses. MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End, unsigned Reg, SlotIndex &LastUseIdx) const; + + /// markAsJoined - Remember that CopyMI has already been joined. + void markAsJoined(MachineInstr *CopyMI); }; } // End llvm namespace diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 13e1454..43904a7 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -442,25 +442,18 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { BasicBlock *DispatchBlock = BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F); - // Add a call to dispatch_setup at the start of the dispatch block. This is - // expanded to any target-specific setup that needs to be done. - Value *SetupArg = - CastInst::Create(Instruction::BitCast, FunctionContext, - Type::getInt8PtrTy(F.getContext()), "", - DispatchBlock); - CallInst::Create(DispatchSetupFn, SetupArg, "", DispatchBlock); - // Insert a load of the callsite in the dispatch block, and a switch on its - // value. By default, we go to a block that just does an unwind (which is the - // correct action for a standard call). - BasicBlock *UnwindBlock = - BasicBlock::Create(F.getContext(), "unwindbb", &F); - Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock)); + // value. By default, we issue a trap statement. + BasicBlock *TrapBlock = + BasicBlock::Create(F.getContext(), "trapbb", &F); + CallInst::Create(Intrinsic::getDeclaration(F.getParent(), Intrinsic::trap), + "", TrapBlock); + new UnreachableInst(F.getContext(), TrapBlock); Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true, DispatchBlock); SwitchInst *DispatchSwitch = - SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(), + SwitchInst::Create(DispatchLoad, TrapBlock, Invokes.size(), DispatchBlock); // Split the entry block to insert the conditional branch for the setjmp. BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(), @@ -524,6 +517,11 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg, "dispatch", EntryBB->getTerminator()); + + // Add a call to dispatch_setup after the setjmp call. This is expanded to any + // target-specific setup that needs to be done. + CallInst::Create(DispatchSetupFn, "", EntryBB->getTerminator()); + // check the return value of the setjmp. non-zero goes to dispatcher. Value *IsNormal = new ICmpInst(EntryBB->getTerminator(), ICmpInst::ICMP_EQ, DispatchVal, Zero, @@ -564,7 +562,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { // Replace all unwinds with a branch to the unwind handler. // ??? Should this ever happen with sjlj exceptions? for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) { - BranchInst::Create(UnwindBlock, Unwinds[i]); + BranchInst::Create(TrapBlock, Unwinds[i]); Unwinds[i]->eraseFromParent(); } diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index 6e3fa90..ca79caf 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -10,47 +10,20 @@ #define DEBUG_TYPE "slotindexes" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; - -// Yep - these are thread safe. See the header for details. -namespace { - - - class EmptyIndexListEntry : public IndexListEntry { - public: - EmptyIndexListEntry() : IndexListEntry(EMPTY_KEY) {} - }; - - class TombstoneIndexListEntry : public IndexListEntry { - public: - TombstoneIndexListEntry() : IndexListEntry(TOMBSTONE_KEY) {} - }; - - // The following statics are thread safe. They're read only, and you - // can't step from them to any other list entries. - ManagedStatic<EmptyIndexListEntry> IndexListEntryEmptyKey; - ManagedStatic<TombstoneIndexListEntry> IndexListEntryTombstoneKey; -} - char SlotIndexes::ID = 0; INITIALIZE_PASS(SlotIndexes, "slotindexes", "Slot index numbering", false, false) -IndexListEntry* IndexListEntry::getEmptyKeyEntry() { - return &*IndexListEntryEmptyKey; -} - -IndexListEntry* IndexListEntry::getTombstoneKeyEntry() { - return &*IndexListEntryTombstoneKey; -} - +STATISTIC(NumLocalRenum, "Number of local renumberings"); +STATISTIC(NumGlobalRenum, "Number of global renumberings"); void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const { au.setPreservesAll(); @@ -59,7 +32,7 @@ void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const { void SlotIndexes::releaseMemory() { mi2iMap.clear(); - mbb2IdxMap.clear(); + MBBRanges.clear(); idx2MBBMap.clear(); clearList(); } @@ -85,13 +58,15 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { "Index list non-empty at initial numbering?"); assert(idx2MBBMap.empty() && "Index -> MBB mapping non-empty at initial numbering?"); - assert(mbb2IdxMap.empty() && + assert(MBBRanges.empty() && "MBB -> Index mapping non-empty at initial numbering?"); assert(mi2iMap.empty() && "MachineInstr -> Index mapping non-empty at initial numbering?"); functionSize = 0; unsigned index = 0; + MBBRanges.resize(mf->getNumBlockIDs()); + idx2MBBMap.reserve(mf->size()); push_back(createEntry(0, index)); @@ -103,8 +78,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { // Insert an index for the MBB start. SlotIndex blockStartIndex(back(), SlotIndex::LOAD); - index += SlotIndex::NUM; - for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end(); miItr != miEnd; ++miItr) { MachineInstr *mi = miItr; @@ -112,32 +85,19 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { continue; // Insert a store index for the instr. - push_back(createEntry(mi, index)); + push_back(createEntry(mi, index += SlotIndex::InstrDist)); // Save this base index in the maps. - mi2iMap.insert( - std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD))); + mi2iMap.insert(std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD))); ++functionSize; - - unsigned Slots = mi->getDesc().getNumDefs(); - if (Slots == 0) - Slots = 1; - - index += (Slots + 1) * SlotIndex::NUM; } - // We insert two blank instructions between basic blocks. - // One to represent live-out registers and one to represent live-ins. - push_back(createEntry(0, index)); - index += SlotIndex::NUM; - - push_back(createEntry(0, index)); - - SlotIndex blockEndIndex(back(), SlotIndex::LOAD); - mbb2IdxMap.insert( - std::make_pair(mbb, std::make_pair(blockStartIndex, blockEndIndex))); + // We insert one blank instructions between basic blocks. + push_back(createEntry(0, index += SlotIndex::InstrDist)); + MBBRanges[mbb->getNumber()].first = blockStartIndex; + MBBRanges[mbb->getNumber()].second = SlotIndex(back(), SlotIndex::LOAD); idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb)); } @@ -151,38 +111,41 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { } void SlotIndexes::renumberIndexes() { - // Renumber updates the index of every element of the index list. - // If all instrs in the function have been allocated an index (which has been - // placed in the index list in the order of instruction iteration) then the - // resulting numbering will match what would have been generated by the - // pass during the initial numbering of the function if the new instructions - // had been present. DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n"); + ++NumGlobalRenum; - functionSize = 0; unsigned index = 0; for (IndexListEntry *curEntry = front(); curEntry != getTail(); curEntry = curEntry->getNext()) { - curEntry->setIndex(index); - - if (curEntry->getInstr() == 0) { - // MBB start entry. Just step index by 1. - index += SlotIndex::NUM; - } - else { - ++functionSize; - unsigned Slots = curEntry->getInstr()->getDesc().getNumDefs(); - if (Slots == 0) - Slots = 1; - - index += (Slots + 1) * SlotIndex::NUM; - } + index += SlotIndex::InstrDist; } } +// Renumber indexes locally after curEntry was inserted, but failed to get a new +// index. +void SlotIndexes::renumberIndexes(IndexListEntry *curEntry) { + // Number indexes with half the default spacing so we can catch up quickly. + const unsigned Space = SlotIndex::InstrDist/2; + assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM"); + + IndexListEntry *start = curEntry->getPrev(); + unsigned index = start->getIndex(); + IndexListEntry *tail = getTail(); + do { + curEntry->setIndex(index += Space); + curEntry = curEntry->getNext(); + // If the next index is bigger, we have caught up. + } while (curEntry != tail && curEntry->getIndex() <= index); + + DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << start->getIndex() << '-' + << index << " ***\n"); + ++NumLocalRenum; +} + + void SlotIndexes::dump() const { for (const IndexListEntry *itr = front(); itr != getTail(); itr = itr->getNext()) { @@ -195,11 +158,9 @@ void SlotIndexes::dump() const { } } - for (MBB2IdxMap::const_iterator itr = mbb2IdxMap.begin(); - itr != mbb2IdxMap.end(); ++itr) { - dbgs() << "MBB " << itr->first->getNumber() << " (" << itr->first << ") - [" - << itr->second.first << ", " << itr->second.second << "]\n"; - } + for (unsigned i = 0, e = MBBRanges.size(); i != e; ++i) + dbgs() << "BB#" << i << "\t[" << MBBRanges[i].first << ';' + << MBBRanges[i].second << ")\n"; } // Print a SlotIndex to a raw_ostream. diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp index 9c0bf16..6949618 100644 --- a/lib/CodeGen/SpillPlacement.cpp +++ b/lib/CodeGen/SpillPlacement.cpp @@ -67,11 +67,11 @@ void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { /// because all weights are positive. /// struct SpillPlacement::Node { - /// Frequency - Total block frequency feeding into[0] or out of[1] the bundle. + /// Scale - Inverse block frequency feeding into[0] or out of[1] the bundle. /// Ideally, these two numbers should be identical, but inaccuracies in the /// block frequency estimates means that we need to normalize ingoing and /// outgoing frequencies separately so they are commensurate. - float Frequency[2]; + float Scale[2]; /// Bias - Normalized contributions from non-transparent blocks. /// A bundle connected to a MustSpill block has a huge negative bias, @@ -107,7 +107,7 @@ struct SpillPlacement::Node { /// Node - Create a blank Node. Node() { - Frequency[0] = Frequency[1] = 0; + Scale[0] = Scale[1] = 0; } /// clear - Reset per-query data, but preserve frequencies that only depend on @@ -121,7 +121,7 @@ struct SpillPlacement::Node { /// out=0 for an ingoing link, and 1 for an outgoing link. void addLink(unsigned b, float w, bool out) { // Normalize w relative to all connected blocks from that direction. - w /= Frequency[out]; + w *= Scale[out]; // There can be multiple links to the same bundle, add them up. for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) @@ -134,9 +134,10 @@ struct SpillPlacement::Node { } /// addBias - Bias this node from an ingoing[0] or outgoing[1] link. + /// Return the change to the total number of positive biases. void addBias(float w, bool out) { // Normalize w relative to all connected blocks from that direction. - w /= Frequency[out]; + w *= Scale[out]; Bias += w; } @@ -175,13 +176,22 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { nodes = new Node[bundles->getNumBundles()]; // Compute total ingoing and outgoing block frequencies for all bundles. + BlockFrequency.resize(mf.getNumBlockIDs()); for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) { - float Freq = getBlockFrequency(I); + float Freq = LiveIntervals::getSpillWeight(true, false, + loops->getLoopDepth(I)); unsigned Num = I->getNumber(); - nodes[bundles->getBundle(Num, 1)].Frequency[0] += Freq; - nodes[bundles->getBundle(Num, 0)].Frequency[1] += Freq; + BlockFrequency[Num] = Freq; + nodes[bundles->getBundle(Num, 1)].Scale[0] += Freq; + nodes[bundles->getBundle(Num, 0)].Scale[1] += Freq; } + // Scales are reciprocal frequencies. + for (unsigned i = 0, e = bundles->getNumBundles(); i != e; ++i) + for (unsigned d = 0; d != 2; ++d) + if (nodes[i].Scale[d] > 0) + nodes[i].Scale[d] = 1 / nodes[i].Scale[d]; + // We never change the function. return false; } @@ -200,31 +210,12 @@ void SpillPlacement::activate(unsigned n) { } -/// prepareNodes - Compute node biases and weights from a set of constraints. +/// addConstraints - Compute node biases and weights from a set of constraints. /// Set a bit in NodeMask for each active node. -void SpillPlacement:: -prepareNodes(const SmallVectorImpl<BlockConstraint> &LiveBlocks) { - for (SmallVectorImpl<BlockConstraint>::const_iterator I = LiveBlocks.begin(), +void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) { + for (ArrayRef<BlockConstraint>::iterator I = LiveBlocks.begin(), E = LiveBlocks.end(); I != E; ++I) { - MachineBasicBlock *MBB = MF->getBlockNumbered(I->Number); - float Freq = getBlockFrequency(MBB); - - // Is this a transparent block? Link ingoing and outgoing bundles. - if (I->Entry == DontCare && I->Exit == DontCare) { - unsigned ib = bundles->getBundle(I->Number, 0); - unsigned ob = bundles->getBundle(I->Number, 1); - - // Ignore self-loops. - if (ib == ob) - continue; - activate(ib); - activate(ob); - nodes[ib].addLink(ob, Freq, 1); - nodes[ob].addLink(ib, Freq, 0); - continue; - } - - // This block is not transparent, but it can still add bias. + float Freq = getBlockFrequency(I->Number); const float Bias[] = { 0, // DontCare, 1, // PrefReg, @@ -248,10 +239,54 @@ prepareNodes(const SmallVectorImpl<BlockConstraint> &LiveBlocks) { } } +void SpillPlacement::addLinks(ArrayRef<unsigned> Links) { + for (ArrayRef<unsigned>::iterator I = Links.begin(), E = Links.end(); I != E; + ++I) { + unsigned Number = *I; + unsigned ib = bundles->getBundle(Number, 0); + unsigned ob = bundles->getBundle(Number, 1); + + // Ignore self-loops. + if (ib == ob) + continue; + activate(ib); + activate(ob); + if (nodes[ib].Links.empty() && !nodes[ib].mustSpill()) + Linked.push_back(ib); + if (nodes[ob].Links.empty() && !nodes[ob].mustSpill()) + Linked.push_back(ob); + float Freq = getBlockFrequency(Number); + nodes[ib].addLink(ob, Freq, 1); + nodes[ob].addLink(ib, Freq, 0); + } +} + +bool SpillPlacement::scanActiveBundles() { + Linked.clear(); + RecentPositive.clear(); + for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) { + nodes[n].update(nodes); + // A node that must spill, or a node without any links is not going to + // change its value ever again, so exclude it from iterations. + if (nodes[n].mustSpill()) + continue; + if (!nodes[n].Links.empty()) + Linked.push_back(n); + if (nodes[n].preferReg()) + RecentPositive.push_back(n); + } + return !RecentPositive.empty(); +} + /// iterate - Repeatedly update the Hopfield nodes until stability or the /// maximum number of iterations is reached. /// @param Linked - Numbers of linked nodes that need updating. -void SpillPlacement::iterate(const SmallVectorImpl<unsigned> &Linked) { +void SpillPlacement::iterate() { + // First update the recently positive nodes. They have likely received new + // negative bias that will turn them off. + while (!RecentPositive.empty()) + nodes[RecentPositive.pop_back_val()].update(nodes); + if (Linked.empty()) return; @@ -267,10 +302,13 @@ void SpillPlacement::iterate(const SmallVectorImpl<unsigned> &Linked) { for (SmallVectorImpl<unsigned>::const_reverse_iterator I = llvm::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) { unsigned n = *I; - bool C = nodes[n].update(nodes); - Changed |= C; + if (nodes[n].update(nodes)) { + Changed = true; + if (nodes[n].preferReg()) + RecentPositive.push_back(n); + } } - if (!Changed) + if (!Changed || !RecentPositive.empty()) return; // Scan forwards, skipping the first node which was just updated. @@ -278,53 +316,37 @@ void SpillPlacement::iterate(const SmallVectorImpl<unsigned> &Linked) { for (SmallVectorImpl<unsigned>::const_iterator I = llvm::next(Linked.begin()), E = Linked.end(); I != E; ++I) { unsigned n = *I; - bool C = nodes[n].update(nodes); - Changed |= C; + if (nodes[n].update(nodes)) { + Changed = true; + if (nodes[n].preferReg()) + RecentPositive.push_back(n); + } } - if (!Changed) + if (!Changed || !RecentPositive.empty()) return; } } -bool -SpillPlacement::placeSpills(const SmallVectorImpl<BlockConstraint> &LiveBlocks, - BitVector &RegBundles) { +void SpillPlacement::prepare(BitVector &RegBundles) { + Linked.clear(); + RecentPositive.clear(); // Reuse RegBundles as our ActiveNodes vector. ActiveNodes = &RegBundles; ActiveNodes->clear(); ActiveNodes->resize(bundles->getNumBundles()); +} - // Compute active nodes, links and biases. - prepareNodes(LiveBlocks); - - // Update all active nodes, and find the ones that are actually linked to - // something so their value may change when iterating. - SmallVector<unsigned, 8> Linked; - for (int n = RegBundles.find_first(); n>=0; n = RegBundles.find_next(n)) { - nodes[n].update(nodes); - // A node that must spill, or a node without any links is not going to - // change its value ever again, so exclude it from iterations. - if (!nodes[n].Links.empty() && !nodes[n].mustSpill()) - Linked.push_back(n); - } - - // Iterate the network to convergence. - iterate(Linked); +bool +SpillPlacement::finish() { + assert(ActiveNodes && "Call prepare() first"); - // Write preferences back to RegBundles. + // Write preferences back to ActiveNodes. bool Perfect = true; - for (int n = RegBundles.find_first(); n>=0; n = RegBundles.find_next(n)) + for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) if (!nodes[n].preferReg()) { - RegBundles.reset(n); + ActiveNodes->reset(n); Perfect = false; } + ActiveNodes = 0; return Perfect; } - -/// getBlockFrequency - Return our best estimate of the block frequency which is -/// the expected number of block executions per function invocation. -float SpillPlacement::getBlockFrequency(const MachineBasicBlock *MBB) { - // Use the unnormalized spill weight for real block frequencies. - return LiveIntervals::getSpillWeight(true, false, loops->getLoopDepth(MBB)); -} - diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h index ef2d516..6952ad8 100644 --- a/lib/CodeGen/SpillPlacement.h +++ b/lib/CodeGen/SpillPlacement.h @@ -10,8 +10,8 @@ // This analysis computes the optimal spill code placement between basic blocks. // // The runOnMachineFunction() method only precomputes some profiling information -// about the CFG. The real work is done by placeSpills() which is called by the -// register allocator. +// about the CFG. The real work is done by prepare(), addConstraints(), and +// finish() which are called by the register allocator. // // Given a variable that is live across multiple basic blocks, and given // constraints on the basic blocks where the variable is live, determine which @@ -27,6 +27,8 @@ #ifndef LLVM_CODEGEN_SPILLPLACEMENT_H #define LLVM_CODEGEN_SPILLPLACEMENT_H +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunctionPass.h" namespace llvm { @@ -35,7 +37,6 @@ class BitVector; class EdgeBundles; class MachineBasicBlock; class MachineLoopInfo; -template <typename> class SmallVectorImpl; class SpillPlacement : public MachineFunctionPass { struct Node; @@ -44,10 +45,20 @@ class SpillPlacement : public MachineFunctionPass { const MachineLoopInfo *loops; Node *nodes; - // Nodes that are active in the current computation. Owned by the placeSpills + // Nodes that are active in the current computation. Owned by the prepare() // caller. BitVector *ActiveNodes; + // Nodes with active links. Populated by scanActiveBundles. + SmallVector<unsigned, 8> Linked; + + // Nodes that went positive during the last call to scanActiveBundles or + // iterate. + SmallVector<unsigned, 8> RecentPositive; + + // Block frequencies are computed once. Indexed by block number. + SmallVector<float, 4> BlockFrequency; + public: static char ID; // Pass identification, replacement for typeid. @@ -70,28 +81,53 @@ public: BorderConstraint Exit : 8; ///< Constraint on block exit. }; - /// placeSpills - Compute the optimal spill code placement given the - /// constraints. No MustSpill constraints will be violated, and the smallest - /// possible number of PrefX constraints will be violated, weighted by - /// expected execution frequencies. - /// @param LiveBlocks Constraints for blocks that have the variable live in or - /// live out. DontCare/DontCare means the variable is live - /// through the block. DontCare/X means the variable is live - /// out, but not live in. + /// prepare - Reset state and prepare for a new spill placement computation. /// @param RegBundles Bit vector to receive the edge bundles where the /// variable should be kept in a register. Each bit /// corresponds to an edge bundle, a set bit means the /// variable should be kept in a register through the /// bundle. A clear bit means the variable should be - /// spilled. + /// spilled. This vector is retained. + void prepare(BitVector &RegBundles); + + /// addConstraints - Add constraints and biases. This method may be called + /// more than once to accumulate constraints. + /// @param LiveBlocks Constraints for blocks that have the variable live in or + /// live out. + void addConstraints(ArrayRef<BlockConstraint> LiveBlocks); + + /// addLinks - Add transparent blocks with the given numbers. + void addLinks(ArrayRef<unsigned> Links); + + /// scanActiveBundles - Perform an initial scan of all bundles activated by + /// addConstraints and addLinks, updating their state. Add all the bundles + /// that now prefer a register to RecentPositive. + /// Prepare internal data structures for iterate. + /// Return true is there are any positive nodes. + bool scanActiveBundles(); + + /// iterate - Update the network iteratively until convergence, or new bundles + /// are found. + void iterate(); + + /// getRecentPositive - Return an array of bundles that became positive during + /// the previous call to scanActiveBundles or iterate. + ArrayRef<unsigned> getRecentPositive() { return RecentPositive; } + + /// finish - Compute the optimal spill code placement given the + /// constraints. No MustSpill constraints will be violated, and the smallest + /// possible number of PrefX constraints will be violated, weighted by + /// expected execution frequencies. + /// The selected bundles are returned in the bitvector passed to prepare(). /// @return True if a perfect solution was found, allowing the variable to be /// in a register through all relevant bundles. - bool placeSpills(const SmallVectorImpl<BlockConstraint> &LiveBlocks, - BitVector &RegBundles); + bool finish(); /// getBlockFrequency - Return the estimated block execution frequency per /// function invocation. - float getBlockFrequency(const MachineBasicBlock*); + float getBlockFrequency(unsigned Number) const { + return BlockFrequency[Number]; + } private: virtual bool runOnMachineFunction(MachineFunction&); @@ -99,8 +135,6 @@ private: virtual void releaseMemory(); void activate(unsigned); - void prepareNodes(const SmallVectorImpl<BlockConstraint>&); - void iterate(const SmallVectorImpl<unsigned>&); }; } // end namespace llvm diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index fd38582..b6bbcd7 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -11,6 +11,7 @@ #include "Spiller.h" #include "VirtRegMap.h" +#include "LiveRangeEdit.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -24,7 +25,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include <set> using namespace llvm; @@ -180,11 +180,9 @@ public: VirtRegMap &vrm) : SpillerBase(pass, mf, vrm) {} - void spill(LiveInterval *li, - SmallVectorImpl<LiveInterval*> &newIntervals, - const SmallVectorImpl<LiveInterval*> &) { + void spill(LiveRangeEdit &LRE) { // Ignore spillIs - we don't use it. - trivialSpillEverywhere(li, newIntervals); + trivialSpillEverywhere(&LRE.getParent(), *LRE.getNewVRegs()); } }; @@ -210,22 +208,22 @@ public: vrm(&vrm) {} /// Falls back on LiveIntervals::addIntervalsForSpills. - void spill(LiveInterval *li, - SmallVectorImpl<LiveInterval*> &newIntervals, - const SmallVectorImpl<LiveInterval*> &spillIs) { + void spill(LiveRangeEdit &LRE) { std::vector<LiveInterval*> added = - lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm); - newIntervals.insert(newIntervals.end(), added.begin(), added.end()); + lis->addIntervalsForSpills(LRE.getParent(), LRE.getUselessVRegs(), + loopInfo, *vrm); + LRE.getNewVRegs()->insert(LRE.getNewVRegs()->end(), + added.begin(), added.end()); // Update LiveStacks. - int SS = vrm->getStackSlot(li->reg); + int SS = vrm->getStackSlot(LRE.getReg()); if (SS == VirtRegMap::NO_STACK_SLOT) return; - const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(li->reg); + const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(LRE.getReg()); LiveInterval &SI = lss->getOrCreateInterval(SS, RC); if (!SI.hasAtLeastOneValue()) SI.getNextValue(SlotIndex(), 0, lss->getVNInfoAllocator()); - SI.MergeRangesInAsValue(*li, SI.getValNumInfo(0)); + SI.MergeRangesInAsValue(LRE.getParent(), SI.getValNumInfo(0)); } }; diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h index f017583..41f1727 100644 --- a/lib/CodeGen/Spiller.h +++ b/lib/CodeGen/Spiller.h @@ -12,11 +12,9 @@ namespace llvm { - class LiveInterval; + class LiveRangeEdit; class MachineFunction; class MachineFunctionPass; - class SlotIndex; - template <typename T> class SmallVectorImpl; class VirtRegMap; /// Spiller interface. @@ -27,16 +25,8 @@ namespace llvm { public: virtual ~Spiller() = 0; - /// spill - Spill the given live interval. The method used will depend on - /// the Spiller implementation selected. - /// - /// @param li The live interval to be spilled. - /// @param spillIs A list of intervals that are about to be spilled, - /// and so cannot be used for remat etc. - /// @param newIntervals The newly created intervals will be appended here. - virtual void spill(LiveInterval *li, - SmallVectorImpl<LiveInterval*> &newIntervals, - const SmallVectorImpl<LiveInterval*> &spillIs) = 0; + /// spill - Spill the LRE.getParent() live interval. + virtual void spill(LiveRangeEdit &LRE) = 0; }; diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index fd5d50b..ac9d72b 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -16,12 +16,11 @@ #include "SplitKit.h" #include "LiveRangeEdit.h" #include "VirtRegMap.h" -#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -29,9 +28,8 @@ using namespace llvm; -static cl::opt<bool> -AllowSplit("spiller-splits-edges", - cl::desc("Allow critical edge splitting during spilling")); +STATISTIC(NumFinished, "Number of splits finished"); +STATISTIC(NumSimple, "Number of splits that were simple"); //===----------------------------------------------------------------------===// // Split Analysis @@ -45,49 +43,105 @@ SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm, LIS(lis), Loops(mli), TII(*MF.getTarget().getInstrInfo()), - CurLI(0) {} + CurLI(0), + LastSplitPoint(MF.getNumBlockIDs()) {} void SplitAnalysis::clear() { UseSlots.clear(); - UsingInstrs.clear(); - UsingBlocks.clear(); - LiveBlocks.clear(); + UseBlocks.clear(); + ThroughBlocks.clear(); CurLI = 0; } -bool SplitAnalysis::canAnalyzeBranch(const MachineBasicBlock *MBB) { - MachineBasicBlock *T, *F; - SmallVector<MachineOperand, 4> Cond; - return !TII.AnalyzeBranch(const_cast<MachineBasicBlock&>(*MBB), T, F, Cond); +SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) { + const MachineBasicBlock *MBB = MF.getBlockNumbered(Num); + const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor(); + std::pair<SlotIndex, SlotIndex> &LSP = LastSplitPoint[Num]; + + // Compute split points on the first call. The pair is independent of the + // current live interval. + if (!LSP.first.isValid()) { + MachineBasicBlock::const_iterator FirstTerm = MBB->getFirstTerminator(); + if (FirstTerm == MBB->end()) + LSP.first = LIS.getMBBEndIdx(MBB); + else + LSP.first = LIS.getInstructionIndex(FirstTerm); + + // If there is a landing pad successor, also find the call instruction. + if (!LPad) + return LSP.first; + // There may not be a call instruction (?) in which case we ignore LPad. + LSP.second = LSP.first; + for (MachineBasicBlock::const_iterator I = FirstTerm, E = MBB->begin(); + I != E; --I) + if (I->getDesc().isCall()) { + LSP.second = LIS.getInstructionIndex(I); + break; + } + } + + // If CurLI is live into a landing pad successor, move the last split point + // back to the call that may throw. + if (LPad && LSP.second.isValid() && LIS.isLiveInToMBB(*CurLI, LPad)) + return LSP.second; + else + return LSP.first; } /// analyzeUses - Count instructions, basic blocks, and loops using CurLI. void SplitAnalysis::analyzeUses() { + assert(UseSlots.empty() && "Call clear first"); + + // First get all the defs from the interval values. This provides the correct + // slots for early clobbers. + for (LiveInterval::const_vni_iterator I = CurLI->vni_begin(), + E = CurLI->vni_end(); I != E; ++I) + if (!(*I)->isPHIDef() && !(*I)->isUnused()) + UseSlots.push_back((*I)->def); + + // Get use slots form the use-def chain. const MachineRegisterInfo &MRI = MF.getRegInfo(); - for (MachineRegisterInfo::reg_iterator I = MRI.reg_begin(CurLI->reg), - E = MRI.reg_end(); I != E; ++I) { - MachineOperand &MO = I.getOperand(); - if (MO.isUse() && MO.isUndef()) - continue; - MachineInstr *MI = MO.getParent(); - if (MI->isDebugValue() || !UsingInstrs.insert(MI)) - continue; - UseSlots.push_back(LIS.getInstructionIndex(MI).getDefIndex()); - MachineBasicBlock *MBB = MI->getParent(); - UsingBlocks[MBB]++; - } + for (MachineRegisterInfo::use_nodbg_iterator + I = MRI.use_nodbg_begin(CurLI->reg), E = MRI.use_nodbg_end(); I != E; + ++I) + if (!I.getOperand().isUndef()) + UseSlots.push_back(LIS.getInstructionIndex(&*I).getDefIndex()); + array_pod_sort(UseSlots.begin(), UseSlots.end()); - calcLiveBlockInfo(); - DEBUG(dbgs() << " counted " - << UsingInstrs.size() << " instrs, " - << UsingBlocks.size() << " blocks.\n"); + + // Remove duplicates, keeping the smaller slot for each instruction. + // That is what we want for early clobbers. + UseSlots.erase(std::unique(UseSlots.begin(), UseSlots.end(), + SlotIndex::isSameInstr), + UseSlots.end()); + + // Compute per-live block info. + if (!calcLiveBlockInfo()) { + // FIXME: calcLiveBlockInfo found inconsistencies in the live range. + // I am looking at you, SimpleRegisterCoalescing! + DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n"); + const_cast<LiveIntervals&>(LIS) + .shrinkToUses(const_cast<LiveInterval*>(CurLI)); + UseBlocks.clear(); + ThroughBlocks.clear(); + bool fixed = calcLiveBlockInfo(); + (void)fixed; + assert(fixed && "Couldn't fix broken live interval"); + } + + DEBUG(dbgs() << "Analyze counted " + << UseSlots.size() << " instrs in " + << UseBlocks.size() << " blocks, through " + << NumThroughBlocks << " blocks.\n"); } /// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks /// where CurLI is live. -void SplitAnalysis::calcLiveBlockInfo() { +bool SplitAnalysis::calcLiveBlockInfo() { + ThroughBlocks.resize(MF.getNumBlockIDs()); + NumThroughBlocks = 0; if (CurLI->empty()) - return; + return true; LiveInterval::const_iterator LVI = CurLI->begin(); LiveInterval::const_iterator LVE = CurLI->end(); @@ -104,24 +158,14 @@ void SplitAnalysis::calcLiveBlockInfo() { SlotIndex Start, Stop; tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); - // The last split point is the latest possible insertion point that dominates - // all successor blocks. If interference reaches LastSplitPoint, it is not - // possible to insert a split or reload that makes CurLI live in the - // outgoing bundle. - MachineBasicBlock::iterator LSP = LIS.getLastSplitPoint(*CurLI, BI.MBB); - if (LSP == BI.MBB->end()) - BI.LastSplitPoint = Stop; - else - BI.LastSplitPoint = LIS.getInstructionIndex(LSP); - // LVI is the first live segment overlapping MBB. BI.LiveIn = LVI->start <= Start; if (!BI.LiveIn) BI.Def = LVI->start; // Find the first and last uses in the block. - BI.Uses = hasUses(MFI); - if (BI.Uses && UseI != UseE) { + bool Uses = UseI != UseE && *UseI < Stop; + if (Uses) { BI.FirstUse = *UseI; assert(BI.FirstUse >= Start); do ++UseI; @@ -149,7 +193,16 @@ void SplitAnalysis::calcLiveBlockInfo() { // Don't set LiveThrough when the block has a gap. BI.LiveThrough = !hasGap && BI.LiveIn && BI.LiveOut; - LiveBlocks.push_back(BI); + if (Uses) + UseBlocks.push_back(BI); + else { + ++NumThroughBlocks; + ThroughBlocks.set(BI.MBB->getNumber()); + } + // FIXME: This should never happen. The live range stops or starts without a + // corresponding use. An earlier pass did something wrong. + if (!BI.LiveThrough && !Uses) + return false; // LVI is now at LVE or LVI->end >= Stop. if (LVI == LVE) @@ -165,6 +218,30 @@ void SplitAnalysis::calcLiveBlockInfo() { else MFI = LIS.getMBBFromIndex(LVI->start); } + return true; +} + +unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const { + if (cli->empty()) + return 0; + LiveInterval *li = const_cast<LiveInterval*>(cli); + LiveInterval::iterator LVI = li->begin(); + LiveInterval::iterator LVE = li->end(); + unsigned Count = 0; + + // Loop over basic blocks where li is live. + MachineFunction::const_iterator MFI = LIS.getMBBFromIndex(LVI->start); + SlotIndex Stop = LIS.getMBBEndIdx(MFI); + for (;;) { + ++Count; + LVI = li->advanceTo(LVI, Stop); + if (LVI == LVE) + return Count; + do { + ++MFI; + Stop = LIS.getMBBEndIdx(MFI); + } while (Stop <= LVI->start); + } } bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const { @@ -181,15 +258,6 @@ bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const { return I != Orig.begin() && (--I)->end == Idx; } -void SplitAnalysis::print(const BlockPtrSet &B, raw_ostream &OS) const { - for (BlockPtrSet::const_iterator I = B.begin(), E = B.end(); I != E; ++I) { - unsigned count = UsingBlocks.lookup(*I); - OS << " BB#" << (*I)->getNumber(); - if (count) - OS << '(' << count << ')'; - } -} - void SplitAnalysis::analyze(const LiveInterval *li) { clear(); CurLI = li; @@ -198,171 +266,242 @@ void SplitAnalysis::analyze(const LiveInterval *li) { //===----------------------------------------------------------------------===// -// LiveIntervalMap +// Split Editor //===----------------------------------------------------------------------===// -// Work around the fact that the std::pair constructors are broken for pointer -// pairs in some implementations. makeVV(x, 0) works. -static inline std::pair<const VNInfo*, VNInfo*> -makeVV(const VNInfo *a, VNInfo *b) { - return std::make_pair(a, b); -} +/// Create a new SplitEditor for editing the LiveInterval analyzed by SA. +SplitEditor::SplitEditor(SplitAnalysis &sa, + LiveIntervals &lis, + VirtRegMap &vrm, + MachineDominatorTree &mdt) + : SA(sa), LIS(lis), VRM(vrm), + MRI(vrm.getMachineFunction().getRegInfo()), + MDT(mdt), + TII(*vrm.getMachineFunction().getTarget().getInstrInfo()), + TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()), + Edit(0), + OpenIdx(0), + RegAssign(Allocator) +{} -void LiveIntervalMap::reset(LiveInterval *li) { - LI = li; +void SplitEditor::reset(LiveRangeEdit &lre) { + Edit = &lre; + OpenIdx = 0; + RegAssign.clear(); Values.clear(); - LiveOutCache.clear(); + + // We don't need to clear LiveOutCache, only LiveOutSeen entries are read. + LiveOutSeen.clear(); + + // We don't need an AliasAnalysis since we will only be performing + // cheap-as-a-copy remats anyway. + Edit->anyRematerializable(LIS, TII, 0); } -bool LiveIntervalMap::isComplexMapped(const VNInfo *ParentVNI) const { - ValueMap::const_iterator i = Values.find(ParentVNI); - return i != Values.end() && i->second == 0; +void SplitEditor::dump() const { + if (RegAssign.empty()) { + dbgs() << " empty\n"; + return; + } + + for (RegAssignMap::const_iterator I = RegAssign.begin(); I.valid(); ++I) + dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value(); + dbgs() << '\n'; } -// defValue - Introduce a LI def for ParentVNI that could be later than -// ParentVNI->def. -VNInfo *LiveIntervalMap::defValue(const VNInfo *ParentVNI, SlotIndex Idx) { - assert(LI && "call reset first"); +VNInfo *SplitEditor::defValue(unsigned RegIdx, + const VNInfo *ParentVNI, + SlotIndex Idx) { assert(ParentVNI && "Mapping NULL value"); assert(Idx.isValid() && "Invalid SlotIndex"); - assert(ParentLI.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI"); + assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI"); + LiveInterval *LI = Edit->get(RegIdx); // Create a new value. VNInfo *VNI = LI->getNextValue(Idx, 0, LIS.getVNInfoAllocator()); - // Preserve the PHIDef bit. - if (ParentVNI->isPHIDef() && Idx == ParentVNI->def) - VNI->setIsPHIDef(true); - // Use insert for lookup, so we can add missing values with a second lookup. - std::pair<ValueMap::iterator,bool> InsP = - Values.insert(makeVV(ParentVNI, Idx == ParentVNI->def ? VNI : 0)); + std::pair<ValueMap::iterator, bool> InsP = + Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id), VNI)); + + // This was the first time (RegIdx, ParentVNI) was mapped. + // Keep it as a simple def without any liveness. + if (InsP.second) + return VNI; - // This is now a complex def. Mark with a NULL in valueMap. - if (!InsP.second) + // If the previous value was a simple mapping, add liveness for it now. + if (VNInfo *OldVNI = InsP.first->second) { + SlotIndex Def = OldVNI->def; + LI->addRange(LiveRange(Def, Def.getNextSlot(), OldVNI)); + // No longer a simple mapping. InsP.first->second = 0; + } + + // This is a complex mapping, add liveness for VNI + SlotIndex Def = VNI->def; + LI->addRange(LiveRange(Def, Def.getNextSlot(), VNI)); return VNI; } - -// mapValue - Find the mapped value for ParentVNI at Idx. -// Potentially create phi-def values. -VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx, - bool *simple) { - assert(LI && "call reset first"); +void SplitEditor::markComplexMapped(unsigned RegIdx, const VNInfo *ParentVNI) { assert(ParentVNI && "Mapping NULL value"); - assert(Idx.isValid() && "Invalid SlotIndex"); - assert(ParentLI.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI"); + VNInfo *&VNI = Values[std::make_pair(RegIdx, ParentVNI->id)]; - // Use insert for lookup, so we can add missing values with a second lookup. - std::pair<ValueMap::iterator,bool> InsP = - Values.insert(makeVV(ParentVNI, 0)); - - // This was an unknown value. Create a simple mapping. - if (InsP.second) { - if (simple) *simple = true; - return InsP.first->second = LI->createValueCopy(ParentVNI, - LIS.getVNInfoAllocator()); - } + // ParentVNI was either unmapped or already complex mapped. Either way. + if (!VNI) + return; - // This was a simple mapped value. - if (InsP.first->second) { - if (simple) *simple = true; - return InsP.first->second; - } + // This was previously a single mapping. Make sure the old def is represented + // by a trivial live range. + SlotIndex Def = VNI->def; + Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI)); + VNI = 0; +} - // This is a complex mapped value. There may be multiple defs, and we may need - // to create phi-defs. - if (simple) *simple = false; +// extendRange - Extend the live range to reach Idx. +// Potentially create phi-def values. +void SplitEditor::extendRange(unsigned RegIdx, SlotIndex Idx) { + assert(Idx.isValid() && "Invalid SlotIndex"); MachineBasicBlock *IdxMBB = LIS.getMBBFromIndex(Idx); assert(IdxMBB && "No MBB at Idx"); + LiveInterval *LI = Edit->get(RegIdx); // Is there a def in the same MBB we can extend? - if (VNInfo *VNI = extendTo(IdxMBB, Idx)) - return VNI; + if (LI->extendInBlock(LIS.getMBBStartIdx(IdxMBB), Idx)) + return; // Now for the fun part. We know that ParentVNI potentially has multiple defs, // and we may need to create even more phi-defs to preserve VNInfo SSA form. // Perform a search for all predecessor blocks where we know the dominating - // VNInfo. Insert phi-def VNInfos along the path back to IdxMBB. - DEBUG(dbgs() << "\n Reaching defs for BB#" << IdxMBB->getNumber() - << " at " << Idx << " in " << *LI << '\n'); + // VNInfo. + VNInfo *VNI = findReachingDefs(LI, IdxMBB, Idx.getNextSlot()); + + // When there were multiple different values, we may need new PHIs. + if (!VNI) + return updateSSA(); + + // Poor man's SSA update for the single-value case. + LiveOutPair LOP(VNI, MDT[LIS.getMBBFromIndex(VNI->def)]); + for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(), + E = LiveInBlocks.end(); I != E; ++I) { + MachineBasicBlock *MBB = I->DomNode->getBlock(); + SlotIndex Start = LIS.getMBBStartIdx(MBB); + if (I->Kill.isValid()) + LI->addRange(LiveRange(Start, I->Kill, VNI)); + else { + LiveOutCache[MBB] = LOP; + LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI)); + } + } +} + +/// findReachingDefs - Search the CFG for known live-out values. +/// Add required live-in blocks to LiveInBlocks. +VNInfo *SplitEditor::findReachingDefs(LiveInterval *LI, + MachineBasicBlock *KillMBB, + SlotIndex Kill) { + // Initialize the live-out cache the first time it is needed. + if (LiveOutSeen.empty()) { + unsigned N = VRM.getMachineFunction().getNumBlockIDs(); + LiveOutSeen.resize(N); + LiveOutCache.resize(N); + } // Blocks where LI should be live-in. - SmallVector<MachineDomTreeNode*, 16> LiveIn; - LiveIn.push_back(MDT[IdxMBB]); + SmallVector<MachineBasicBlock*, 16> WorkList(1, KillMBB); + + // Remember if we have seen more than one value. + bool UniqueVNI = true; + VNInfo *TheVNI = 0; // Using LiveOutCache as a visited set, perform a BFS for all reaching defs. - for (unsigned i = 0; i != LiveIn.size(); ++i) { - MachineBasicBlock *MBB = LiveIn[i]->getBlock(); + for (unsigned i = 0; i != WorkList.size(); ++i) { + MachineBasicBlock *MBB = WorkList[i]; + assert(!MBB->pred_empty() && "Value live-in to entry block?"); for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { MachineBasicBlock *Pred = *PI; + LiveOutPair &LOP = LiveOutCache[Pred]; + // Is this a known live-out block? - std::pair<LiveOutMap::iterator,bool> LOIP = - LiveOutCache.insert(std::make_pair(Pred, LiveOutPair())); - // Yes, we have been here before. - if (!LOIP.second) { - DEBUG(if (VNInfo *VNI = LOIP.first->second.first) - dbgs() << " known valno #" << VNI->id - << " at BB#" << Pred->getNumber() << '\n'); + if (LiveOutSeen.test(Pred->getNumber())) { + if (VNInfo *VNI = LOP.first) { + if (TheVNI && TheVNI != VNI) + UniqueVNI = false; + TheVNI = VNI; + } continue; } + // First time. LOP is garbage and must be cleared below. + LiveOutSeen.set(Pred->getNumber()); + // Does Pred provide a live-out value? - SlotIndex Last = LIS.getMBBEndIdx(Pred).getPrevSlot(); - if (VNInfo *VNI = extendTo(Pred, Last)) { - MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(VNI->def); - DEBUG(dbgs() << " found valno #" << VNI->id - << " from BB#" << DefMBB->getNumber() - << " at BB#" << Pred->getNumber() << '\n'); - LiveOutPair &LOP = LOIP.first->second; - LOP.first = VNI; - LOP.second = MDT[DefMBB]; + SlotIndex Start, Last; + tie(Start, Last) = LIS.getSlotIndexes()->getMBBRange(Pred); + Last = Last.getPrevSlot(); + VNInfo *VNI = LI->extendInBlock(Start, Last); + LOP.first = VNI; + if (VNI) { + LOP.second = MDT[LIS.getMBBFromIndex(VNI->def)]; + if (TheVNI && TheVNI != VNI) + UniqueVNI = false; + TheVNI = VNI; continue; } + LOP.second = 0; + // No, we need a live-in value for Pred as well - if (Pred != IdxMBB) - LiveIn.push_back(MDT[Pred]); + if (Pred != KillMBB) + WorkList.push_back(Pred); + else + // Loopback to KillMBB, so value is really live through. + Kill = SlotIndex(); } } - // We may need to add phi-def values to preserve the SSA form. + // Transfer WorkList to LiveInBlocks in reverse order. + // This ordering works best with updateSSA(). + LiveInBlocks.clear(); + LiveInBlocks.reserve(WorkList.size()); + while(!WorkList.empty()) + LiveInBlocks.push_back(MDT[WorkList.pop_back_val()]); + + // The kill block may not be live-through. + assert(LiveInBlocks.back().DomNode->getBlock() == KillMBB); + LiveInBlocks.back().Kill = Kill; + + return UniqueVNI ? TheVNI : 0; +} + +void SplitEditor::updateSSA() { // This is essentially the same iterative algorithm that SSAUpdater uses, // except we already have a dominator tree, so we don't have to recompute it. - VNInfo *IdxVNI = 0; unsigned Changes; do { Changes = 0; - DEBUG(dbgs() << " Iterating over " << LiveIn.size() << " blocks.\n"); - // Propagate live-out values down the dominator tree, inserting phi-defs when - // necessary. Since LiveIn was created by a BFS, going backwards makes it more - // likely for us to visit immediate dominators before their children. - for (unsigned i = LiveIn.size(); i; --i) { - MachineDomTreeNode *Node = LiveIn[i-1]; + // Propagate live-out values down the dominator tree, inserting phi-defs + // when necessary. + for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(), + E = LiveInBlocks.end(); I != E; ++I) { + MachineDomTreeNode *Node = I->DomNode; + // Skip block if the live-in value has already been determined. + if (!Node) + continue; MachineBasicBlock *MBB = Node->getBlock(); MachineDomTreeNode *IDom = Node->getIDom(); LiveOutPair IDomValue; + // We need a live-in value to a block with no immediate dominator? // This is probably an unreachable block that has survived somehow. - bool needPHI = !IDom; + bool needPHI = !IDom || !LiveOutSeen.test(IDom->getBlock()->getNumber()); - // Get the IDom live-out value. - if (!needPHI) { - LiveOutMap::iterator I = LiveOutCache.find(IDom->getBlock()); - if (I != LiveOutCache.end()) - IDomValue = I->second; - else - // If IDom is outside our set of live-out blocks, there must be new - // defs, and we need a phi-def here. - needPHI = true; - } - - // IDom dominates all of our predecessors, but it may not be the immediate - // dominator. Check if any of them have live-out values that are properly - // dominated by IDom. If so, we need a phi-def here. + // IDom dominates all of our predecessors, but it may not be their + // immediate dominator. Check if any of them have live-out values that are + // properly dominated by IDom. If so, we need a phi-def here. if (!needPHI) { + IDomValue = LiveOutCache[IDom->getBlock()]; for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { LiveOutPair Value = LiveOutCache[*PI]; @@ -378,215 +517,57 @@ VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx, } } + // The value may be live-through even if Kill is set, as can happen when + // we are called from extendRange. In that case LiveOutSeen is true, and + // LiveOutCache indicates a foreign or missing value. + LiveOutPair &LOP = LiveOutCache[MBB]; + // Create a phi-def if required. if (needPHI) { ++Changes; SlotIndex Start = LIS.getMBBStartIdx(MBB); + unsigned RegIdx = RegAssign.lookup(Start); + LiveInterval *LI = Edit->get(RegIdx); VNInfo *VNI = LI->getNextValue(Start, 0, LIS.getVNInfoAllocator()); VNI->setIsPHIDef(true); - DEBUG(dbgs() << " - BB#" << MBB->getNumber() - << " phi-def #" << VNI->id << " at " << Start << '\n'); - // We no longer need LI to be live-in. - LiveIn.erase(LiveIn.begin()+(i-1)); - // Blocks in LiveIn are either IdxMBB, or have a value live-through. - if (MBB == IdxMBB) - IdxVNI = VNI; - // Check if we need to update live-out info. - LiveOutMap::iterator I = LiveOutCache.find(MBB); - if (I == LiveOutCache.end() || I->second.second == Node) { - // We already have a live-out defined in MBB, so this must be IdxMBB. - assert(MBB == IdxMBB && "Adding phi-def to known live-out"); - LI->addRange(LiveRange(Start, Idx.getNextSlot(), VNI)); - } else { - // This phi-def is also live-out, so color the whole block. + I->Value = VNI; + // This block is done, we know the final value. + I->DomNode = 0; + if (I->Kill.isValid()) + LI->addRange(LiveRange(Start, I->Kill, VNI)); + else { LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI)); - I->second = LiveOutPair(VNI, Node); + LOP = LiveOutPair(VNI, Node); } } else if (IDomValue.first) { - // No phi-def here. Remember incoming value for IdxMBB. - if (MBB == IdxMBB) - IdxVNI = IDomValue.first; + // No phi-def here. Remember incoming value. + I->Value = IDomValue.first; + if (I->Kill.isValid()) + continue; // Propagate IDomValue if needed: // MBB is live-out and doesn't define its own value. - LiveOutMap::iterator I = LiveOutCache.find(MBB); - if (I != LiveOutCache.end() && I->second.second != Node && - I->second.first != IDomValue.first) { + if (LOP.second != Node && LOP.first != IDomValue.first) { ++Changes; - I->second = IDomValue; - DEBUG(dbgs() << " - BB#" << MBB->getNumber() - << " idom valno #" << IDomValue.first->id - << " from BB#" << IDom->getBlock()->getNumber() << '\n'); + LOP = IDomValue; } } } - DEBUG(dbgs() << " - made " << Changes << " changes.\n"); } while (Changes); - assert(IdxVNI && "Didn't find value for Idx"); - -#ifndef NDEBUG - // Check the LiveOutCache invariants. - for (LiveOutMap::iterator I = LiveOutCache.begin(), E = LiveOutCache.end(); - I != E; ++I) { - assert(I->first && "Null MBB entry in cache"); - assert(I->second.first && "Null VNInfo in cache"); - assert(I->second.second && "Null DomTreeNode in cache"); - if (I->second.second->getBlock() == I->first) - continue; - for (MachineBasicBlock::pred_iterator PI = I->first->pred_begin(), - PE = I->first->pred_end(); PI != PE; ++PI) - assert(LiveOutCache.lookup(*PI) == I->second && "Bad invariant"); - } -#endif - - // Since we went through the trouble of a full BFS visiting all reaching defs, - // the values in LiveIn are now accurate. No more phi-defs are needed + // The values in LiveInBlocks are now accurate. No more phi-defs are needed // for these blocks, so we can color the live ranges. - // This makes the next mapValue call much faster. - for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) { - MachineBasicBlock *MBB = LiveIn[i]->getBlock(); + for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(), + E = LiveInBlocks.end(); I != E; ++I) { + if (!I->DomNode) + continue; + assert(I->Value && "No live-in value found"); + MachineBasicBlock *MBB = I->DomNode->getBlock(); SlotIndex Start = LIS.getMBBStartIdx(MBB); - VNInfo *VNI = LiveOutCache.lookup(MBB).first; - - // Anything in LiveIn other than IdxMBB is live-through. - // In IdxMBB, we should stop at Idx unless the same value is live-out. - if (MBB == IdxMBB && IdxVNI != VNI) - LI->addRange(LiveRange(Start, Idx.getNextSlot(), IdxVNI)); - else - LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI)); + unsigned RegIdx = RegAssign.lookup(Start); + LiveInterval *LI = Edit->get(RegIdx); + LI->addRange(LiveRange(Start, I->Kill.isValid() ? + I->Kill : LIS.getMBBEndIdx(MBB), I->Value)); } - - return IdxVNI; -} - -#ifndef NDEBUG -void LiveIntervalMap::dumpCache() { - for (LiveOutMap::iterator I = LiveOutCache.begin(), E = LiveOutCache.end(); - I != E; ++I) { - assert(I->first && "Null MBB entry in cache"); - assert(I->second.first && "Null VNInfo in cache"); - assert(I->second.second && "Null DomTreeNode in cache"); - dbgs() << " cache: BB#" << I->first->getNumber() - << " has valno #" << I->second.first->id << " from BB#" - << I->second.second->getBlock()->getNumber() << ", preds"; - for (MachineBasicBlock::pred_iterator PI = I->first->pred_begin(), - PE = I->first->pred_end(); PI != PE; ++PI) - dbgs() << " BB#" << (*PI)->getNumber(); - dbgs() << '\n'; - } - dbgs() << " cache: " << LiveOutCache.size() << " entries.\n"; -} -#endif - -// extendTo - Find the last LI value defined in MBB at or before Idx. The -// ParentLI is assumed to be live at Idx. Extend the live range to Idx. -// Return the found VNInfo, or NULL. -VNInfo *LiveIntervalMap::extendTo(const MachineBasicBlock *MBB, SlotIndex Idx) { - assert(LI && "call reset first"); - LiveInterval::iterator I = std::upper_bound(LI->begin(), LI->end(), Idx); - if (I == LI->begin()) - return 0; - --I; - if (I->end <= LIS.getMBBStartIdx(MBB)) - return 0; - if (I->end <= Idx) - I->end = Idx.getNextSlot(); - return I->valno; -} - -// addSimpleRange - Add a simple range from ParentLI to LI. -// ParentVNI must be live in the [Start;End) interval. -void LiveIntervalMap::addSimpleRange(SlotIndex Start, SlotIndex End, - const VNInfo *ParentVNI) { - assert(LI && "call reset first"); - bool simple; - VNInfo *VNI = mapValue(ParentVNI, Start, &simple); - // A simple mapping is easy. - if (simple) { - LI->addRange(LiveRange(Start, End, VNI)); - return; - } - - // ParentVNI is a complex value. We must map per MBB. - MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start); - MachineFunction::iterator MBBE = LIS.getMBBFromIndex(End.getPrevSlot()); - - if (MBB == MBBE) { - LI->addRange(LiveRange(Start, End, VNI)); - return; - } - - // First block. - LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI)); - - // Run sequence of full blocks. - for (++MBB; MBB != MBBE; ++MBB) { - Start = LIS.getMBBStartIdx(MBB); - LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), - mapValue(ParentVNI, Start))); - } - - // Final block. - Start = LIS.getMBBStartIdx(MBB); - if (Start != End) - LI->addRange(LiveRange(Start, End, mapValue(ParentVNI, Start))); -} - -/// addRange - Add live ranges to LI where [Start;End) intersects ParentLI. -/// All needed values whose def is not inside [Start;End) must be defined -/// beforehand so mapValue will work. -void LiveIntervalMap::addRange(SlotIndex Start, SlotIndex End) { - assert(LI && "call reset first"); - LiveInterval::const_iterator B = ParentLI.begin(), E = ParentLI.end(); - LiveInterval::const_iterator I = std::lower_bound(B, E, Start); - - // Check if --I begins before Start and overlaps. - if (I != B) { - --I; - if (I->end > Start) - addSimpleRange(Start, std::min(End, I->end), I->valno); - ++I; - } - - // The remaining ranges begin after Start. - for (;I != E && I->start < End; ++I) - addSimpleRange(I->start, std::min(End, I->end), I->valno); -} - - -//===----------------------------------------------------------------------===// -// Split Editor -//===----------------------------------------------------------------------===// - -/// Create a new SplitEditor for editing the LiveInterval analyzed by SA. -SplitEditor::SplitEditor(SplitAnalysis &sa, - LiveIntervals &lis, - VirtRegMap &vrm, - MachineDominatorTree &mdt, - LiveRangeEdit &edit) - : SA(sa), LIS(lis), VRM(vrm), - MRI(vrm.getMachineFunction().getRegInfo()), - MDT(mdt), - TII(*vrm.getMachineFunction().getTarget().getInstrInfo()), - TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()), - Edit(edit), - OpenIdx(0), - RegAssign(Allocator) -{ - // We don't need an AliasAnalysis since we will only be performing - // cheap-as-a-copy remats anyway. - Edit.anyRematerializable(LIS, TII, 0); -} - -void SplitEditor::dump() const { - if (RegAssign.empty()) { - dbgs() << " empty\n"; - return; - } - - for (RegAssignMap::const_iterator I = RegAssign.begin(); I.valid(); ++I) - dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value(); - dbgs() << '\n'; } VNInfo *SplitEditor::defFromParent(unsigned RegIdx, @@ -596,51 +577,53 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, MachineBasicBlock::iterator I) { MachineInstr *CopyMI = 0; SlotIndex Def; - LiveInterval *LI = Edit.get(RegIdx); + LiveInterval *LI = Edit->get(RegIdx); + + // We may be trying to avoid interference that ends at a deleted instruction, + // so always begin RegIdx 0 early and all others late. + bool Late = RegIdx != 0; // Attempt cheap-as-a-copy rematerialization. LiveRangeEdit::Remat RM(ParentVNI); - if (Edit.canRematerializeAt(RM, UseIdx, true, LIS)) { - Def = Edit.rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI); + if (Edit->canRematerializeAt(RM, UseIdx, true, LIS)) { + Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI, Late); } else { // Can't remat, just insert a copy from parent. CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg) - .addReg(Edit.getReg()); - Def = LIS.InsertMachineInstrInMaps(CopyMI).getDefIndex(); + .addReg(Edit->getReg()); + Def = LIS.getSlotIndexes()->insertMachineInstrInMaps(CopyMI, Late) + .getDefIndex(); } // Define the value in Reg. - VNInfo *VNI = LIMappers[RegIdx].defValue(ParentVNI, Def); + VNInfo *VNI = defValue(RegIdx, ParentVNI, Def); VNI->setCopy(CopyMI); - - // Add minimal liveness for the new value. - Edit.get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI)); return VNI; } /// Create a new virtual register and live interval. -void SplitEditor::openIntv() { - assert(!OpenIdx && "Previous LI not closed before openIntv"); - +unsigned SplitEditor::openIntv() { // Create the complement as index 0. - if (Edit.empty()) { - Edit.create(MRI, LIS, VRM); - LIMappers.push_back(LiveIntervalMap(LIS, MDT, Edit.getParent())); - LIMappers.back().reset(Edit.get(0)); - } + if (Edit->empty()) + Edit->create(LIS, VRM); // Create the open interval. - OpenIdx = Edit.size(); - Edit.create(MRI, LIS, VRM); - LIMappers.push_back(LiveIntervalMap(LIS, MDT, Edit.getParent())); - LIMappers[OpenIdx].reset(Edit.get(OpenIdx)); + OpenIdx = Edit->size(); + Edit->create(LIS, VRM); + return OpenIdx; +} + +void SplitEditor::selectIntv(unsigned Idx) { + assert(Idx != 0 && "Cannot select the complement interval"); + assert(Idx < Edit->size() && "Can only select previously opened interval"); + OpenIdx = Idx; } SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) { assert(OpenIdx && "openIntv not called before enterIntvBefore"); DEBUG(dbgs() << " enterIntvBefore " << Idx); Idx = Idx.getBaseIndex(); - VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx); if (!ParentVNI) { DEBUG(dbgs() << ": not live\n"); return Idx; @@ -658,14 +641,14 @@ SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) { SlotIndex End = LIS.getMBBEndIdx(&MBB); SlotIndex Last = End.getPrevSlot(); DEBUG(dbgs() << " enterIntvAtEnd BB#" << MBB.getNumber() << ", " << Last); - VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Last); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Last); if (!ParentVNI) { DEBUG(dbgs() << ": not live\n"); return End; } DEBUG(dbgs() << ": valno " << ParentVNI->id); VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB, - LIS.getLastSplitPoint(Edit.getParent(), &MBB)); + LIS.getLastSplitPoint(Edit->getParent(), &MBB)); RegAssign.insert(VNI->def, End, OpenIdx); DEBUG(dump()); return VNI->def; @@ -689,7 +672,7 @@ SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) { // The interval must be live beyond the instruction at Idx. Idx = Idx.getBoundaryIndex(); - VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx); if (!ParentVNI) { DEBUG(dbgs() << ": not live\n"); return Idx.getNextSlot(); @@ -709,7 +692,7 @@ SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) { // The interval must be live into the instruction at Idx. Idx = Idx.getBoundaryIndex(); - VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx); if (!ParentVNI) { DEBUG(dbgs() << ": not live\n"); return Idx.getNextSlot(); @@ -727,7 +710,7 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { SlotIndex Start = LIS.getMBBStartIdx(&MBB); DEBUG(dbgs() << " leaveIntvAtTop BB#" << MBB.getNumber() << ", " << Start); - VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Start); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start); if (!ParentVNI) { DEBUG(dbgs() << ": not live\n"); return Start; @@ -742,30 +725,169 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) { assert(OpenIdx && "openIntv not called before overlapIntv"); - assert(Edit.getParent().getVNInfoAt(Start) == - Edit.getParent().getVNInfoAt(End.getPrevSlot()) && + const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start); + assert(ParentVNI == Edit->getParent().getVNInfoAt(End.getPrevSlot()) && "Parent changes value in extended range"); - assert(Edit.get(0)->getVNInfoAt(Start) && "Start must come from leaveIntv*"); assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) && "Range cannot span basic blocks"); - // Treat this as useIntv() for now. The complement interval will be extended - // as needed by mapValue(). + // The complement interval will be extended as needed by extendRange(). + if (ParentVNI) + markComplexMapped(0, ParentVNI); DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):"); RegAssign.insert(Start, End, OpenIdx); DEBUG(dump()); } -/// closeIntv - Indicate that we are done editing the currently open -/// LiveInterval, and ranges can be trimmed. -void SplitEditor::closeIntv() { - assert(OpenIdx && "openIntv not called before closeIntv"); - OpenIdx = 0; +/// transferValues - Transfer all possible values to the new live ranges. +/// Values that were rematerialized are left alone, they need extendRange(). +bool SplitEditor::transferValues() { + bool Skipped = false; + LiveInBlocks.clear(); + RegAssignMap::const_iterator AssignI = RegAssign.begin(); + for (LiveInterval::const_iterator ParentI = Edit->getParent().begin(), + ParentE = Edit->getParent().end(); ParentI != ParentE; ++ParentI) { + DEBUG(dbgs() << " blit " << *ParentI << ':'); + VNInfo *ParentVNI = ParentI->valno; + // RegAssign has holes where RegIdx 0 should be used. + SlotIndex Start = ParentI->start; + AssignI.advanceTo(Start); + do { + unsigned RegIdx; + SlotIndex End = ParentI->end; + if (!AssignI.valid()) { + RegIdx = 0; + } else if (AssignI.start() <= Start) { + RegIdx = AssignI.value(); + if (AssignI.stop() < End) { + End = AssignI.stop(); + ++AssignI; + } + } else { + RegIdx = 0; + End = std::min(End, AssignI.start()); + } + + // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI. + DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx); + LiveInterval *LI = Edit->get(RegIdx); + + // Check for a simply defined value that can be blitted directly. + if (VNInfo *VNI = Values.lookup(std::make_pair(RegIdx, ParentVNI->id))) { + DEBUG(dbgs() << ':' << VNI->id); + LI->addRange(LiveRange(Start, End, VNI)); + Start = End; + continue; + } + + // Skip rematerialized values, we need to use extendRange() and + // extendPHIKillRanges() to completely recompute the live ranges. + if (Edit->didRematerialize(ParentVNI)) { + DEBUG(dbgs() << "(remat)"); + Skipped = true; + Start = End; + continue; + } + + // Initialize the live-out cache the first time it is needed. + if (LiveOutSeen.empty()) { + unsigned N = VRM.getMachineFunction().getNumBlockIDs(); + LiveOutSeen.resize(N); + LiveOutCache.resize(N); + } + + // This value has multiple defs in RegIdx, but it wasn't rematerialized, + // so the live range is accurate. Add live-in blocks in [Start;End) to the + // LiveInBlocks. + MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start); + SlotIndex BlockStart, BlockEnd; + tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(MBB); + + // The first block may be live-in, or it may have its own def. + if (Start != BlockStart) { + VNInfo *VNI = LI->extendInBlock(BlockStart, + std::min(BlockEnd, End).getPrevSlot()); + assert(VNI && "Missing def for complex mapped value"); + DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber()); + // MBB has its own def. Is it also live-out? + if (BlockEnd <= End) { + LiveOutSeen.set(MBB->getNumber()); + LiveOutCache[MBB] = LiveOutPair(VNI, MDT[MBB]); + } + // Skip to the next block for live-in. + ++MBB; + BlockStart = BlockEnd; + } + + // Handle the live-in blocks covered by [Start;End). + assert(Start <= BlockStart && "Expected live-in block"); + while (BlockStart < End) { + DEBUG(dbgs() << ">BB#" << MBB->getNumber()); + BlockEnd = LIS.getMBBEndIdx(MBB); + if (BlockStart == ParentVNI->def) { + // This block has the def of a parent PHI, so it isn't live-in. + assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?"); + VNInfo *VNI = LI->extendInBlock(BlockStart, + std::min(BlockEnd, End).getPrevSlot()); + assert(VNI && "Missing def for complex mapped parent PHI"); + if (End >= BlockEnd) { + // Live-out as well. + LiveOutSeen.set(MBB->getNumber()); + LiveOutCache[MBB] = LiveOutPair(VNI, MDT[MBB]); + } + } else { + // This block needs a live-in value. + LiveInBlocks.push_back(MDT[MBB]); + // The last block covered may not be live-out. + if (End < BlockEnd) + LiveInBlocks.back().Kill = End; + else { + // Live-out, but we need updateSSA to tell us the value. + LiveOutSeen.set(MBB->getNumber()); + LiveOutCache[MBB] = LiveOutPair((VNInfo*)0, + (MachineDomTreeNode*)0); + } + } + BlockStart = BlockEnd; + ++MBB; + } + Start = End; + } while (Start != ParentI->end); + DEBUG(dbgs() << '\n'); + } + + if (!LiveInBlocks.empty()) + updateSSA(); + + return Skipped; +} + +void SplitEditor::extendPHIKillRanges() { + // Extend live ranges to be live-out for successor PHI values. + for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(), + E = Edit->getParent().vni_end(); I != E; ++I) { + const VNInfo *PHIVNI = *I; + if (PHIVNI->isUnused() || !PHIVNI->isPHIDef()) + continue; + unsigned RegIdx = RegAssign.lookup(PHIVNI->def); + MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def); + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + SlotIndex End = LIS.getMBBEndIdx(*PI).getPrevSlot(); + // The predecessor may not have a live-out value. That is OK, like an + // undef PHI operand. + if (Edit->getParent().liveAt(End)) { + assert(RegAssign.lookup(End) == RegIdx && + "Different register assignment in phi predecessor"); + extendRange(RegIdx, End); + } + } + } } -/// rewriteAssigned - Rewrite all uses of Edit.getReg(). -void SplitEditor::rewriteAssigned() { - for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit.getReg()), +/// rewriteAssigned - Rewrite all uses of Edit->getReg(). +void SplitEditor::rewriteAssigned(bool ExtendRanges) { + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()), RE = MRI.reg_end(); RI != RE;) { MachineOperand &MO = RI.getOperand(); MachineInstr *MI = MO.getParent(); @@ -780,147 +902,145 @@ void SplitEditor::rewriteAssigned() { // <undef> operands don't really read the register, so just assign them to // the complement. if (MO.isUse() && MO.isUndef()) { - MO.setReg(Edit.get(0)->reg); + MO.setReg(Edit->get(0)->reg); continue; } SlotIndex Idx = LIS.getInstructionIndex(MI); - Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex(); + if (MO.isDef()) + Idx = MO.isEarlyClobber() ? Idx.getUseIndex() : Idx.getDefIndex(); // Rewrite to the mapped register at Idx. unsigned RegIdx = RegAssign.lookup(Idx); - MO.setReg(Edit.get(RegIdx)->reg); + MO.setReg(Edit->get(RegIdx)->reg); DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t' << Idx << ':' << RegIdx << '\t' << *MI); - // Extend liveness to Idx. - const VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx); - LIMappers[RegIdx].mapValue(ParentVNI, Idx); + // Extend liveness to Idx if the instruction reads reg. + if (!ExtendRanges) + continue; + + // Skip instructions that don't read Reg. + if (MO.isDef()) { + if (!MO.getSubReg() && !MO.isEarlyClobber()) + continue; + // We may wan't to extend a live range for a partial redef, or for a use + // tied to an early clobber. + Idx = Idx.getPrevSlot(); + if (!Edit->getParent().liveAt(Idx)) + continue; + } else + Idx = Idx.getUseIndex(); + + extendRange(RegIdx, Idx); } } -/// rewriteSplit - Rewrite uses of Intvs[0] according to the ConEQ mapping. -void SplitEditor::rewriteComponents(const SmallVectorImpl<LiveInterval*> &Intvs, - const ConnectedVNInfoEqClasses &ConEq) { - for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Intvs[0]->reg), - RE = MRI.reg_end(); RI != RE;) { - MachineOperand &MO = RI.getOperand(); - MachineInstr *MI = MO.getParent(); - ++RI; - if (MO.isUse() && MO.isUndef()) - continue; - // DBG_VALUE instructions should have been eliminated earlier. - SlotIndex Idx = LIS.getInstructionIndex(MI); - Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex(); - DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t' - << Idx << ':'); - const VNInfo *VNI = Intvs[0]->getVNInfoAt(Idx); - assert(VNI && "Interval not live at use."); - MO.setReg(Intvs[ConEq.getEqClass(VNI)]->reg); - DEBUG(dbgs() << VNI->id << '\t' << *MI); +void SplitEditor::deleteRematVictims() { + SmallVector<MachineInstr*, 8> Dead; + for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I){ + LiveInterval *LI = *I; + for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end(); + LII != LIE; ++LII) { + // Dead defs end at the store slot. + if (LII->end != LII->valno->def.getNextSlot()) + continue; + MachineInstr *MI = LIS.getInstructionFromIndex(LII->valno->def); + assert(MI && "Missing instruction for dead def"); + MI->addRegisterDead(LI->reg, &TRI); + + if (!MI->allDefsAreDead()) + continue; + + DEBUG(dbgs() << "All defs dead: " << *MI); + Dead.push_back(MI); + } } + + if (Dead.empty()) + return; + + Edit->eliminateDeadDefs(Dead, LIS, VRM, TII); } -void SplitEditor::finish() { - assert(OpenIdx == 0 && "Previous LI not closed before rewrite"); +void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { + ++NumFinished; // At this point, the live intervals in Edit contain VNInfos corresponding to // the inserted copies. // Add the original defs from the parent interval. - for (LiveInterval::const_vni_iterator I = Edit.getParent().vni_begin(), - E = Edit.getParent().vni_end(); I != E; ++I) { + for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(), + E = Edit->getParent().vni_end(); I != E; ++I) { const VNInfo *ParentVNI = *I; if (ParentVNI->isUnused()) continue; - LiveIntervalMap &LIM = LIMappers[RegAssign.lookup(ParentVNI->def)]; - VNInfo *VNI = LIM.defValue(ParentVNI, ParentVNI->def); - LIM.getLI()->addRange(LiveRange(ParentVNI->def, - ParentVNI->def.getNextSlot(), VNI)); - // Mark all values as complex to force liveness computation. - // This should really only be necessary for remat victims, but we are lazy. - LIM.markComplexMapped(ParentVNI); + unsigned RegIdx = RegAssign.lookup(ParentVNI->def); + VNInfo *VNI = defValue(RegIdx, ParentVNI, ParentVNI->def); + VNI->setIsPHIDef(ParentVNI->isPHIDef()); + VNI->setCopy(ParentVNI->getCopy()); + + // Mark rematted values as complex everywhere to force liveness computation. + // The new live ranges may be truncated. + if (Edit->didRematerialize(ParentVNI)) + for (unsigned i = 0, e = Edit->size(); i != e; ++i) + markComplexMapped(i, ParentVNI); } #ifndef NDEBUG // Every new interval must have a def by now, otherwise the split is bogus. - for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I) + for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) assert((*I)->hasAtLeastOneValue() && "Split interval has no value"); #endif - // FIXME: Don't recompute the liveness of all values, infer it from the - // overlaps between the parent live interval and RegAssign. - // The mapValue algorithm is only necessary when: - // - The parent value maps to multiple defs, and new phis are needed, or - // - The value has been rematerialized before some uses, and we want to - // minimize the live range so it only reaches the remaining uses. - // All other values have simple liveness that can be computed from RegAssign - // and the parent live interval. - - // Extend live ranges to be live-out for successor PHI values. - for (LiveInterval::const_vni_iterator I = Edit.getParent().vni_begin(), - E = Edit.getParent().vni_end(); I != E; ++I) { - const VNInfo *PHIVNI = *I; - if (PHIVNI->isUnused() || !PHIVNI->isPHIDef()) - continue; - unsigned RegIdx = RegAssign.lookup(PHIVNI->def); - LiveIntervalMap &LIM = LIMappers[RegIdx]; - MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def); - DEBUG(dbgs() << " map phi in BB#" << MBB->getNumber() << '@' << PHIVNI->def - << " -> " << RegIdx << '\n'); - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - SlotIndex End = LIS.getMBBEndIdx(*PI).getPrevSlot(); - DEBUG(dbgs() << " pred BB#" << (*PI)->getNumber() << '@' << End); - // The predecessor may not have a live-out value. That is OK, like an - // undef PHI operand. - if (VNInfo *VNI = Edit.getParent().getVNInfoAt(End)) { - DEBUG(dbgs() << " has parent valno #" << VNI->id << " live out\n"); - assert(RegAssign.lookup(End) == RegIdx && - "Different register assignment in phi predecessor"); - LIM.mapValue(VNI, End); - } - else - DEBUG(dbgs() << " is not live-out\n"); - } - DEBUG(dbgs() << " " << *LIM.getLI() << '\n'); - } + // Transfer the simply mapped values, check if any are skipped. + bool Skipped = transferValues(); + if (Skipped) + extendPHIKillRanges(); + else + ++NumSimple; - // Rewrite instructions. - rewriteAssigned(); + // Rewrite virtual registers, possibly extending ranges. + rewriteAssigned(Skipped); - // FIXME: Delete defs that were rematted everywhere. + // Delete defs that were rematted everywhere. + if (Skipped) + deleteRematVictims(); // Get rid of unused values and set phi-kill flags. - for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I) + for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) (*I)->RenumberValues(LIS); + // Provide a reverse mapping from original indices to Edit ranges. + if (LRMap) { + LRMap->clear(); + for (unsigned i = 0, e = Edit->size(); i != e; ++i) + LRMap->push_back(i); + } + // Now check if any registers were separated into multiple components. ConnectedVNInfoEqClasses ConEQ(LIS); - for (unsigned i = 0, e = Edit.size(); i != e; ++i) { + for (unsigned i = 0, e = Edit->size(); i != e; ++i) { // Don't use iterators, they are invalidated by create() below. - LiveInterval *li = Edit.get(i); + LiveInterval *li = Edit->get(i); unsigned NumComp = ConEQ.Classify(li); if (NumComp <= 1) continue; DEBUG(dbgs() << " " << NumComp << " components: " << *li << '\n'); SmallVector<LiveInterval*, 8> dups; dups.push_back(li); - for (unsigned i = 1; i != NumComp; ++i) - dups.push_back(&Edit.create(MRI, LIS, VRM)); - rewriteComponents(dups, ConEQ); - ConEQ.Distribute(&dups[0]); + for (unsigned j = 1; j != NumComp; ++j) + dups.push_back(&Edit->create(LIS, VRM)); + ConEQ.Distribute(&dups[0], MRI); + // The new intervals all map back to i. + if (LRMap) + LRMap->resize(Edit->size(), i); } // Calculate spill weight and allocation hints for new intervals. - VirtRegAuxInfo vrai(VRM.getMachineFunction(), LIS, SA.Loops); - for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I){ - LiveInterval &li = **I; - vrai.CalculateRegClass(li.reg); - vrai.CalculateWeightAndHint(li); - DEBUG(dbgs() << " new interval " << MRI.getRegClass(li.reg)->getName() - << ":" << li << '\n'); - } + Edit->calculateRegClassAndHint(VRM.getMachineFunction(), LIS, SA.Loops); + + assert(!LRMap || LRMap->size() == Edit->size()); } @@ -932,113 +1052,42 @@ void SplitEditor::finish() { /// may be an advantage to split CurLI for the duration of the block. bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) { // If CurLI is local to one block, there is no point to splitting it. - if (LiveBlocks.size() <= 1) + if (UseBlocks.size() <= 1) return false; // Add blocks with multiple uses. - for (unsigned i = 0, e = LiveBlocks.size(); i != e; ++i) { - const BlockInfo &BI = LiveBlocks[i]; - if (!BI.Uses) - continue; - unsigned Instrs = UsingBlocks.lookup(BI.MBB); - if (Instrs <= 1) - continue; - if (Instrs == 2 && BI.LiveIn && BI.LiveOut && !BI.LiveThrough) + for (unsigned i = 0, e = UseBlocks.size(); i != e; ++i) { + const BlockInfo &BI = UseBlocks[i]; + if (BI.FirstUse == BI.LastUse) continue; Blocks.insert(BI.MBB); } return !Blocks.empty(); } -/// splitSingleBlocks - Split CurLI into a separate live interval inside each -/// basic block in Blocks. -void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) { - DEBUG(dbgs() << " splitSingleBlocks for " << Blocks.size() << " blocks.\n"); - - for (unsigned i = 0, e = SA.LiveBlocks.size(); i != e; ++i) { - const SplitAnalysis::BlockInfo &BI = SA.LiveBlocks[i]; - if (!BI.Uses || !Blocks.count(BI.MBB)) - continue; - - openIntv(); - SlotIndex SegStart = enterIntvBefore(BI.FirstUse); - if (!BI.LiveOut || BI.LastUse < BI.LastSplitPoint) { - useIntv(SegStart, leaveIntvAfter(BI.LastUse)); - } else { +void SplitEditor::splitSingleBlock(const SplitAnalysis::BlockInfo &BI) { + openIntv(); + SlotIndex LastSplitPoint = SA.getLastSplitPoint(BI.MBB->getNumber()); + SlotIndex SegStart = enterIntvBefore(std::min(BI.FirstUse, + LastSplitPoint)); + if (!BI.LiveOut || BI.LastUse < LastSplitPoint) { + useIntv(SegStart, leaveIntvAfter(BI.LastUse)); + } else { // The last use is after the last valid split point. - SlotIndex SegStop = leaveIntvBefore(BI.LastSplitPoint); - useIntv(SegStart, SegStop); - overlapIntv(SegStop, BI.LastUse); - } - closeIntv(); + SlotIndex SegStop = leaveIntvBefore(LastSplitPoint); + useIntv(SegStart, SegStop); + overlapIntv(SegStop, BI.LastUse); } - finish(); } - -//===----------------------------------------------------------------------===// -// Sub Block Splitting -//===----------------------------------------------------------------------===// - -/// getBlockForInsideSplit - If CurLI is contained inside a single basic block, -/// and it wou pay to subdivide the interval inside that block, return it. -/// Otherwise return NULL. The returned block can be passed to -/// SplitEditor::splitInsideBlock. -const MachineBasicBlock *SplitAnalysis::getBlockForInsideSplit() { - // The interval must be exclusive to one block. - if (UsingBlocks.size() != 1) - return 0; - // Don't to this for less than 4 instructions. We want to be sure that - // splitting actually reduces the instruction count per interval. - if (UsingInstrs.size() < 4) - return 0; - return UsingBlocks.begin()->first; -} - -/// splitInsideBlock - Split CurLI into multiple intervals inside MBB. -void SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) { - SmallVector<SlotIndex, 32> Uses; - Uses.reserve(SA.UsingInstrs.size()); - for (SplitAnalysis::InstrPtrSet::const_iterator I = SA.UsingInstrs.begin(), - E = SA.UsingInstrs.end(); I != E; ++I) - if ((*I)->getParent() == MBB) - Uses.push_back(LIS.getInstructionIndex(*I)); - DEBUG(dbgs() << " splitInsideBlock BB#" << MBB->getNumber() << " for " - << Uses.size() << " instructions.\n"); - assert(Uses.size() >= 3 && "Need at least 3 instructions"); - array_pod_sort(Uses.begin(), Uses.end()); - - // Simple algorithm: Find the largest gap between uses as determined by slot - // indices. Create new intervals for instructions before the gap and after the - // gap. - unsigned bestPos = 0; - int bestGap = 0; - DEBUG(dbgs() << " dist (" << Uses[0]); - for (unsigned i = 1, e = Uses.size(); i != e; ++i) { - int g = Uses[i-1].distance(Uses[i]); - DEBUG(dbgs() << ") -" << g << "- (" << Uses[i]); - if (g > bestGap) - bestPos = i, bestGap = g; - } - DEBUG(dbgs() << "), best: -" << bestGap << "-\n"); - - // bestPos points to the first use after the best gap. - assert(bestPos > 0 && "Invalid gap"); - - // FIXME: Don't create intervals for low densities. - - // First interval before the gap. Don't create single-instr intervals. - if (bestPos > 1) { - openIntv(); - useIntv(enterIntvBefore(Uses.front()), leaveIntvAfter(Uses[bestPos-1])); - closeIntv(); - } - - // Second interval after the gap. - if (bestPos < Uses.size()-1) { - openIntv(); - useIntv(enterIntvBefore(Uses[bestPos]), leaveIntvAfter(Uses.back())); - closeIntv(); +/// splitSingleBlocks - Split CurLI into a separate live interval inside each +/// basic block in Blocks. +void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) { + DEBUG(dbgs() << " splitSingleBlocks for " << Blocks.size() << " blocks.\n"); + ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA.getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + if (Blocks.count(BI.MBB)) + splitSingleBlock(BI); } - finish(); } diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index e02e629..2ae760a 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -12,7 +12,13 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_CODEGEN_SPLITKIT_H +#define LLVM_CODEGEN_SPLITKIT_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/SlotIndexes.h" @@ -48,17 +54,9 @@ public: const MachineLoopInfo &Loops; const TargetInstrInfo &TII; - // Instructions using the the current register. - typedef SmallPtrSet<const MachineInstr*, 16> InstrPtrSet; - InstrPtrSet UsingInstrs; - // Sorted slot indexes of using instructions. SmallVector<SlotIndex, 8> UseSlots; - // The number of instructions using CurLI in each basic block. - typedef DenseMap<const MachineBasicBlock*, unsigned> BlockCountMap; - BlockCountMap UsingBlocks; - /// Additional information about basic blocks where the current variable is /// live. Such a block will look like one of these templates: /// @@ -75,35 +73,37 @@ public: SlotIndex LastUse; ///< Last instr using current reg. SlotIndex Kill; ///< Interval end point inside block. SlotIndex Def; ///< Interval start point inside block. - /// Last possible point for splitting live ranges. - SlotIndex LastSplitPoint; - bool Uses; ///< Current reg has uses or defs in block. bool LiveThrough; ///< Live in whole block (Templ 5. or 6. above). bool LiveIn; ///< Current reg is live in. bool LiveOut; ///< Current reg is live out. - - // Per-interference pattern scratch data. - bool OverlapEntry; ///< Interference overlaps entering interval. - bool OverlapExit; ///< Interference overlaps exiting interval. }; - /// Basic blocks where var is live. This array is parallel to - /// SpillConstraints. - SmallVector<BlockInfo, 8> LiveBlocks; - private: // Current live interval. const LiveInterval *CurLI; + /// LastSplitPoint - Last legal split point in each basic block in the current + /// function. The first entry is the first terminator, the second entry is the + /// last valid split point for a variable that is live in to a landing pad + /// successor. + SmallVector<std::pair<SlotIndex, SlotIndex>, 8> LastSplitPoint; + + /// UseBlocks - Blocks where CurLI has uses. + SmallVector<BlockInfo, 8> UseBlocks; + + /// ThroughBlocks - Block numbers where CurLI is live through without uses. + BitVector ThroughBlocks; + + /// NumThroughBlocks - Number of live-through blocks. + unsigned NumThroughBlocks; + + SlotIndex computeLastSplitPoint(unsigned Num); + // Sumarize statistics by counting instructions using CurLI. void analyzeUses(); /// calcLiveBlockInfo - Compute per-block information about CurLI. - void calcLiveBlockInfo(); - - /// canAnalyzeBranch - Return true if MBB ends in a branch that can be - /// analyzed. - bool canAnalyzeBranch(const MachineBasicBlock *MBB); + bool calcLiveBlockInfo(); public: SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis, @@ -120,9 +120,14 @@ public: /// getParent - Return the last analyzed interval. const LiveInterval &getParent() const { return *CurLI; } - /// hasUses - Return true if MBB has any uses of CurLI. - bool hasUses(const MachineBasicBlock *MBB) const { - return UsingBlocks.lookup(MBB); + /// getLastSplitPoint - Return that base index of the last valid split point + /// in the basic block numbered Num. + SlotIndex getLastSplitPoint(unsigned Num) { + // Inline the common simple case. + if (LastSplitPoint[Num].first.isValid() && + !LastSplitPoint[Num].second.isValid()) + return LastSplitPoint[Num].first; + return computeLastSplitPoint(Num); } /// isOriginalEndpoint - Return true if the original live range was killed or @@ -132,127 +137,30 @@ public: /// splitting. bool isOriginalEndpoint(SlotIndex Idx) const; - typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet; - - // Print a set of blocks with use counts. - void print(const BlockPtrSet&, raw_ostream&) const; - - /// getMultiUseBlocks - Add basic blocks to Blocks that may benefit from - /// having CurLI split to a new live interval. Return true if Blocks can be - /// passed to SplitEditor::splitSingleBlocks. - bool getMultiUseBlocks(BlockPtrSet &Blocks); - - /// getBlockForInsideSplit - If CurLI is contained inside a single basic - /// block, and it would pay to subdivide the interval inside that block, - /// return it. Otherwise return NULL. The returned block can be passed to - /// SplitEditor::splitInsideBlock. - const MachineBasicBlock *getBlockForInsideSplit(); -}; - + /// getUseBlocks - Return an array of BlockInfo objects for the basic blocks + /// where CurLI has uses. + ArrayRef<BlockInfo> getUseBlocks() { return UseBlocks; } -/// LiveIntervalMap - Map values from a large LiveInterval into a small -/// interval that is a subset. Insert phi-def values as needed. This class is -/// used by SplitEditor to create new smaller LiveIntervals. -/// -/// ParentLI is the larger interval, LI is the subset interval. Every value -/// in LI corresponds to exactly one value in ParentLI, and the live range -/// of the value is contained within the live range of the ParentLI value. -/// Values in ParentLI may map to any number of OpenLI values, including 0. -class LiveIntervalMap { - LiveIntervals &LIS; - MachineDominatorTree &MDT; + /// getNumThroughBlocks - Return the number of through blocks. + unsigned getNumThroughBlocks() const { return NumThroughBlocks; } - // The parent interval is never changed. - const LiveInterval &ParentLI; + /// isThroughBlock - Return true if CurLI is live through MBB without uses. + bool isThroughBlock(unsigned MBB) const { return ThroughBlocks.test(MBB); } - // The child interval's values are fully contained inside ParentLI values. - LiveInterval *LI; + /// getThroughBlocks - Return the set of through blocks. + const BitVector &getThroughBlocks() const { return ThroughBlocks; } - typedef DenseMap<const VNInfo*, VNInfo*> ValueMap; + /// countLiveBlocks - Return the number of blocks where li is live. + /// This is guaranteed to return the same number as getNumThroughBlocks() + + /// getUseBlocks().size() after calling analyze(li). + unsigned countLiveBlocks(const LiveInterval *li) const; - // Map ParentLI values to simple values in LI that are defined at the same - // SlotIndex, or NULL for ParentLI values that have complex LI defs. - // Note there is a difference between values mapping to NULL (complex), and - // values not present (unknown/unmapped). - ValueMap Values; - - typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair; - typedef DenseMap<MachineBasicBlock*,LiveOutPair> LiveOutMap; - - // LiveOutCache - Map each basic block where LI is live out to the live-out - // value and its defining block. One of these conditions shall be true: - // - // 1. !LiveOutCache.count(MBB) - // 2. LiveOutCache[MBB].second.getNode() == MBB - // 3. forall P in preds(MBB): LiveOutCache[P] == LiveOutCache[MBB] - // - // This is only a cache, the values can be computed as: - // - // VNI = LI->getVNInfoAt(LIS.getMBBEndIdx(MBB)) - // Node = mbt_[LIS.getMBBFromIndex(VNI->def)] - // - // The cache is also used as a visiteed set by mapValue(). - LiveOutMap LiveOutCache; - - // Dump the live-out cache to dbgs(). - void dumpCache(); - -public: - LiveIntervalMap(LiveIntervals &lis, - MachineDominatorTree &mdt, - const LiveInterval &parentli) - : LIS(lis), MDT(mdt), ParentLI(parentli), LI(0) {} - - /// reset - clear all data structures and start a new live interval. - void reset(LiveInterval *); - - /// getLI - return the current live interval. - LiveInterval *getLI() const { return LI; } - - /// defValue - define a value in LI from the ParentLI value VNI and Idx. - /// Idx does not have to be ParentVNI->def, but it must be contained within - /// ParentVNI's live range in ParentLI. - /// Return the new LI value. - VNInfo *defValue(const VNInfo *ParentVNI, SlotIndex Idx); - - /// mapValue - map ParentVNI to the corresponding LI value at Idx. It is - /// assumed that ParentVNI is live at Idx. - /// If ParentVNI has not been defined by defValue, it is assumed that - /// ParentVNI->def dominates Idx. - /// If ParentVNI has been defined by defValue one or more times, a value that - /// dominates Idx will be returned. This may require creating extra phi-def - /// values and adding live ranges to LI. - /// If simple is not NULL, *simple will indicate if ParentVNI is a simply - /// mapped value. - VNInfo *mapValue(const VNInfo *ParentVNI, SlotIndex Idx, bool *simple = 0); - - // extendTo - Find the last LI value defined in MBB at or before Idx. The - // parentli is assumed to be live at Idx. Extend the live range to include - // Idx. Return the found VNInfo, or NULL. - VNInfo *extendTo(const MachineBasicBlock *MBB, SlotIndex Idx); - - /// isMapped - Return true is ParentVNI is a known mapped value. It may be a - /// simple 1-1 mapping or a complex mapping to later defs. - bool isMapped(const VNInfo *ParentVNI) const { - return Values.count(ParentVNI); - } - - /// isComplexMapped - Return true if ParentVNI has received new definitions - /// with defValue. - bool isComplexMapped(const VNInfo *ParentVNI) const; - - /// markComplexMapped - Mark ParentVNI as complex mapped regardless of the - /// number of definitions. - void markComplexMapped(const VNInfo *ParentVNI) { Values[ParentVNI] = 0; } - - // addSimpleRange - Add a simple range from ParentLI to LI. - // ParentVNI must be live in the [Start;End) interval. - void addSimpleRange(SlotIndex Start, SlotIndex End, const VNInfo *ParentVNI); + typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet; - /// addRange - Add live ranges to LI where [Start;End) intersects ParentLI. - /// All needed values whose def is not inside [Start;End) must be defined - /// beforehand so mapValue will work. - void addRange(SlotIndex Start, SlotIndex End); + /// getMultiUseBlocks - Add basic blocks to Blocks that may benefit from + /// having CurLI split to a new live interval. Return true if Blocks can be + /// passed to SplitEditor::splitSingleBlocks. + bool getMultiUseBlocks(BlockPtrSet &Blocks); }; @@ -277,7 +185,7 @@ class SplitEditor { const TargetRegisterInfo &TRI; /// Edit - The current parent register and new intervals created. - LiveRangeEdit &Edit; + LiveRangeEdit *Edit; /// Index into Edit of the currently open interval. /// The index 0 is used for the complement, so the first interval started by @@ -295,8 +203,76 @@ class SplitEditor { /// Idx. RegAssignMap RegAssign; - /// LIMappers - One LiveIntervalMap or each interval in Edit. - SmallVector<LiveIntervalMap, 4> LIMappers; + typedef DenseMap<std::pair<unsigned, unsigned>, VNInfo*> ValueMap; + + /// Values - keep track of the mapping from parent values to values in the new + /// intervals. Given a pair (RegIdx, ParentVNI->id), Values contains: + /// + /// 1. No entry - the value is not mapped to Edit.get(RegIdx). + /// 2. Null - the value is mapped to multiple values in Edit.get(RegIdx). + /// Each value is represented by a minimal live range at its def. + /// 3. A non-null VNInfo - the value is mapped to a single new value. + /// The new value has no live ranges anywhere. + ValueMap Values; + + typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair; + typedef IndexedMap<LiveOutPair, MBB2NumberFunctor> LiveOutMap; + + // LiveOutCache - Map each basic block where a new register is live out to the + // live-out value and its defining block. + // One of these conditions shall be true: + // + // 1. !LiveOutCache.count(MBB) + // 2. LiveOutCache[MBB].second.getNode() == MBB + // 3. forall P in preds(MBB): LiveOutCache[P] == LiveOutCache[MBB] + // + // This is only a cache, the values can be computed as: + // + // VNI = Edit.get(RegIdx)->getVNInfoAt(LIS.getMBBEndIdx(MBB)) + // Node = mbt_[LIS.getMBBFromIndex(VNI->def)] + // + // The cache is also used as a visited set by extendRange(). It can be shared + // by all the new registers because at most one is live out of each block. + LiveOutMap LiveOutCache; + + // LiveOutSeen - Indexed by MBB->getNumber(), a bit is set for each valid + // entry in LiveOutCache. + BitVector LiveOutSeen; + + /// LiveInBlock - Info for updateSSA() about a block where a register is + /// live-in. + /// The updateSSA caller provides DomNode and Kill inside MBB, updateSSA() + /// adds the computed live-in value. + struct LiveInBlock { + // Dominator tree node for the block. + // Cleared by updateSSA when the final value has been determined. + MachineDomTreeNode *DomNode; + + // Live-in value filled in by updateSSA once it is known. + VNInfo *Value; + + // Position in block where the live-in range ends, or SlotIndex() if the + // range passes through the block. + SlotIndex Kill; + + LiveInBlock(MachineDomTreeNode *node) : DomNode(node), Value(0) {} + }; + + /// LiveInBlocks - List of live-in blocks used by findReachingDefs() and + /// updateSSA(). This list is usually empty, it exists here to avoid frequent + /// reallocations. + SmallVector<LiveInBlock, 16> LiveInBlocks; + + /// defValue - define a value in RegIdx from ParentVNI at Idx. + /// Idx does not have to be ParentVNI->def, but it must be contained within + /// ParentVNI's live range in ParentLI. The new value is added to the value + /// map. + /// Return the new LI value. + VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx); + + /// markComplexMapped - Mark ParentVNI as complex mapped in RegIdx regardless + /// of the number of defs. + void markComplexMapped(unsigned RegIdx, const VNInfo *ParentVNI); /// defFromParent - Define Reg from ParentVNI at UseIdx using either /// rematerialization or a COPY from parent. Return the new value. @@ -306,27 +282,56 @@ class SplitEditor { MachineBasicBlock &MBB, MachineBasicBlock::iterator I); + /// extendRange - Extend the live range of Edit.get(RegIdx) so it reaches Idx. + /// Insert PHIDefs as needed to preserve SSA form. + void extendRange(unsigned RegIdx, SlotIndex Idx); + + /// findReachingDefs - Starting from MBB, add blocks to LiveInBlocks until all + /// reaching defs for LI are found. + /// @param LI Live interval whose value is needed. + /// @param MBB Block where LI should be live-in. + /// @param Kill Kill point in MBB. + /// @return Unique value seen, or NULL. + VNInfo *findReachingDefs(LiveInterval *LI, MachineBasicBlock *MBB, + SlotIndex Kill); + + /// updateSSA - Compute and insert PHIDefs such that all blocks in + // LiveInBlocks get a known live-in value. Add live ranges to the blocks. + void updateSSA(); + + /// transferValues - Transfer values to the new ranges. + /// Return true if any ranges were skipped. + bool transferValues(); + + /// extendPHIKillRanges - Extend the ranges of all values killed by original + /// parent PHIDefs. + void extendPHIKillRanges(); + /// rewriteAssigned - Rewrite all uses of Edit.getReg() to assigned registers. - void rewriteAssigned(); + void rewriteAssigned(bool ExtendRanges); - /// rewriteComponents - Rewrite all uses of Intv[0] according to the eq - /// classes in ConEQ. - /// This must be done when Intvs[0] is styill live at all uses, before calling - /// ConEq.Distribute(). - void rewriteComponents(const SmallVectorImpl<LiveInterval*> &Intvs, - const ConnectedVNInfoEqClasses &ConEq); + /// deleteRematVictims - Delete defs that are dead after rematerializing. + void deleteRematVictims(); public: /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. /// Newly created intervals will be appended to newIntervals. SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&, - MachineDominatorTree&, LiveRangeEdit&); + MachineDominatorTree&); - /// getAnalysis - Get the corresponding analysis. - SplitAnalysis &getAnalysis() { return SA; } + /// reset - Prepare for a new split. + void reset(LiveRangeEdit&); /// Create a new virtual register and live interval. - void openIntv(); + /// Return the interval index, starting from 1. Interval index 0 is the + /// implicit complement interval. + unsigned openIntv(); + + /// currentIntv - Return the current interval index. + unsigned currentIntv() const { return OpenIdx; } + + /// selectIntv - Select a previously opened interval index. + void selectIntv(unsigned Idx); /// enterIntvBefore - Enter the open interval before the instruction at Idx. /// If the parent interval is not live before Idx, a COPY is not inserted. @@ -369,25 +374,28 @@ public: /// void overlapIntv(SlotIndex Start, SlotIndex End); - /// closeIntv - Indicate that we are done editing the currently open - /// LiveInterval, and ranges can be trimmed. - void closeIntv(); - /// finish - after all the new live ranges have been created, compute the /// remaining live range, and rewrite instructions to use the new registers. - void finish(); + /// @param LRMap When not null, this vector will map each live range in Edit + /// back to the indices returned by openIntv. + /// There may be extra indices created by dead code elimination. + void finish(SmallVectorImpl<unsigned> *LRMap = 0); /// dump - print the current interval maping to dbgs(). void dump() const; // ===--- High level methods ---=== + /// splitSingleBlock - Split CurLI into a separate live interval around the + /// uses in a single block. This is intended to be used as part of a larger + /// split, and doesn't call finish(). + void splitSingleBlock(const SplitAnalysis::BlockInfo &BI); + /// splitSingleBlocks - Split CurLI into a separate live interval inside each /// basic block in Blocks. void splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks); - - /// splitInsideBlock - Split CurLI into multiple intervals inside MBB. - void splitInsideBlock(const MachineBasicBlock *); }; } + +#endif diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index fcaee42..f0a44ab 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -153,7 +153,6 @@ bool StackProtector::InsertStackProtectors() { for (Function::iterator I = F->begin(), E = F->end(); I != E; ) { BasicBlock *BB = I++; - ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); if (!RI) continue; @@ -191,8 +190,6 @@ bool StackProtector::InsertStackProtectors() { // Create the basic block to jump to when the guard check fails. FailBB = CreateFailBB(); - if (DT) - FailBBDom = DT->isReachableFromEntry(BB) ? BB : 0; } // For each block with a return instruction, convert this: @@ -219,9 +216,10 @@ bool StackProtector::InsertStackProtectors() { // Split the basic block before the return instruction. BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return"); - if (DT) { - DT->addNewBlock(NewBB, DT->isReachableFromEntry(BB) ? BB : 0); - FailBBDom = DT->findNearestCommonDominator(FailBBDom, BB); + + if (DT && DT->isReachableFromEntry(BB)) { + DT->addNewBlock(NewBB, BB); + FailBBDom = FailBBDom ? DT->findNearestCommonDominator(FailBBDom, BB) :BB; } // Remove default branch instruction to the new BB. @@ -242,7 +240,7 @@ bool StackProtector::InsertStackProtectors() { // statements in the function. if (!FailBB) return false; - if (DT) + if (DT && FailBBDom) DT->addNewBlock(FailBB, FailBBDom); return true; diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index ec7829e..227eb47 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -587,7 +587,7 @@ StrongPHIElimination::SplitInterferencesForBasicBlock( } // We now walk the PHIs in successor blocks and check for interferences. This - // is necesary because the use of a PHI's operands are logically contained in + // is necessary because the use of a PHI's operands are logically contained in // the predecessor block. The def of a PHI's destination register is processed // along with the other defs in a basic block. diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 15340a3..b9fcd38 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -388,11 +388,6 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, if (MO.isDef() != (i == 0)) return false; - // For the def, it should be the only def of that register. - if (MO.isDef() && (llvm::next(MRI.def_begin(Reg)) != MRI.def_end() || - MRI.isLiveIn(Reg))) - return false; - // Don't allow any virtual-register uses. Rematting an instruction with // virtual register uses would length the live ranges of the uses, which // is not necessarily a good idea, certainly not "trivial". diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index fa311dc..6ed91b0 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -23,6 +23,7 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" @@ -176,6 +177,52 @@ const MCSection *TargetLoweringObjectFileELF::getEHFrameSection() const { SectionKind::getDataRel()); } +MCSymbol * +TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV, + Mangler *Mang, + MachineModuleInfo *MMI) const { + unsigned Encoding = getPersonalityEncoding(); + switch (Encoding & 0x70) { + default: + report_fatal_error("We do not support this DWARF encoding yet!"); + case dwarf::DW_EH_PE_absptr: + return Mang->getSymbol(GV); + break; + case dwarf::DW_EH_PE_pcrel: { + Twine FullName = StringRef("DW.ref.") + Mang->getSymbol(GV)->getName(); + return getContext().GetOrCreateSymbol(FullName); + break; + } + } +} + +void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, + const TargetMachine &TM, + const MCSymbol *Sym) const { + Twine FullName = StringRef("DW.ref.") + Sym->getName(); + MCSymbol *Label = getContext().GetOrCreateSymbol(FullName); + Streamer.EmitSymbolAttribute(Label, MCSA_Hidden); + Streamer.EmitSymbolAttribute(Label, MCSA_Weak); + Twine SectionName = StringRef(".data.") + Label->getName(); + SmallString<64> NameData; + SectionName.toVector(NameData); + unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP; + const MCSection *Sec = getContext().getELFSection(NameData, + ELF::SHT_PROGBITS, + Flags, + SectionKind::getDataRel(), + 0, Label->getName()); + Streamer.SwitchSection(Sec); + Streamer.EmitValueToAlignment(8); + Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); + const MCExpr *E = MCConstantExpr::Create(8, getContext()); + Streamer.EmitELFSize(Label, E); + Streamer.EmitLabel(Label); + + unsigned Size = TM.getTargetData()->getPointerSize(); + Streamer.EmitSymbolValue(Sym, Size); +} + static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { // FIXME: Why is this here? Codegen is should not be in the business @@ -424,8 +471,7 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, } return TargetLoweringObjectFile:: - getExprForDwarfReference(SSym, Mang, MMI, - Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); + getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); } return TargetLoweringObjectFile:: @@ -446,18 +492,10 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, IsFunctionEHFrameSymbolPrivate = false; SupportsWeakOmittedEHFrame = false; + // .comm doesn't support alignment before Leopard. Triple T(((LLVMTargetMachine&)TM).getTargetTriple()); - if (T.getOS() == Triple::Darwin) { - switch (T.getDarwinMajorNumber()) { - case 7: // 10.3 Panther. - case 8: // 10.4 Tiger. - CommDirectiveSupportsAlignment = false; - break; - case 9: // 10.5 Leopard. - case 10: // 10.6 SnowLeopard. - break; - } - } + if (T.isMacOSX() && T.isMacOSXVersionLT(10, 5)) + CommDirectiveSupportsAlignment = false; TargetLoweringObjectFile::Initialize(Ctx, TM); @@ -641,10 +679,11 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { // Parse the section specifier and create it if valid. StringRef Segment, Section; - unsigned TAA = (unsigned)MCSectionMachO::SECTION_ATTRIBUTES, StubSize = 0; + unsigned TAA = 0, StubSize = 0; + bool TAAParsed; std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section, - TAA, StubSize); + TAA, TAAParsed, StubSize); if (!ErrorCode.empty()) { // If invalid, report the error with report_fatal_error. report_fatal_error("Global variable '" + GV->getNameStr() + @@ -654,17 +693,13 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, return DataSection; } - bool TAAWasSet = (TAA != MCSectionMachO::SECTION_ATTRIBUTES); - if (!TAAWasSet) - TAA = 0; // Sensible default if this is a new section. - // Get the section. const MCSectionMachO *S = getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind); // If TAA wasn't set by ParseSectionSpecifier() above, // use the value returned by getMachOSection() as a default. - if (!TAAWasSet) + if (!TAAParsed) TAA = S->getTypeAndAttributes(); // Okay, now that we got the section, verify that the TAA & StubSize agree. @@ -806,14 +841,36 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, } return TargetLoweringObjectFile:: - getExprForDwarfReference(SSym, Mang, MMI, - Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); + getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); } return TargetLoweringObjectFile:: getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer); } +MCSymbol *TargetLoweringObjectFileMachO:: +getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI) const { + // The mach-o version of this method defaults to returning a stub reference. + MachineModuleInfoMachO &MachOMMI = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, true); + Name += "$non_lazy_ptr"; + + // Add information about the stub reference to MachOMMI so that the stub + // gets emitted by the asmprinter. + MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); + MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); + if (StubSym.getPointer() == 0) { + MCSymbol *Sym = Mang->getSymbol(GV); + StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); + } + + return SSym; +} + unsigned TargetLoweringObjectFileMachO::getPersonalityEncoding() const { return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; } @@ -822,7 +879,7 @@ unsigned TargetLoweringObjectFileMachO::getLSDAEncoding() const { return DW_EH_PE_pcrel; } -unsigned TargetLoweringObjectFileMachO::getFDEEncoding() const { +unsigned TargetLoweringObjectFileMachO::getFDEEncoding(bool CFI) const { return DW_EH_PE_pcrel; } diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index b3120b8..52ea872 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -105,7 +105,7 @@ namespace { MachineFunction::iterator &mbbi, unsigned RegB, unsigned RegC, unsigned Dist); - bool isProfitableToConv3Addr(unsigned RegA); + bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB); bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, @@ -124,7 +124,11 @@ namespace { MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, unsigned SrcIdx, unsigned DstIdx, - unsigned Dist); + unsigned Dist, + SmallPtrSet<MachineInstr*, 8> &Processed); + + void ScanUses(unsigned DstReg, MachineBasicBlock *MBB, + SmallPtrSet<MachineInstr*, 8> &Processed); void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &Processed); @@ -615,16 +619,18 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi, /// isProfitableToConv3Addr - Return true if it is profitable to convert the /// given 2-address instruction to a 3-address one. bool -TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA) { +TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){ // Look for situations like this: // %reg1024<def> = MOV r1 // %reg1025<def> = MOV r0 // %reg1026<def> = ADD %reg1024, %reg1025 // r2 = MOV %reg1026 // Turn ADD into a 3-address instruction to avoid a copy. - unsigned FromRegA = getMappedReg(RegA, SrcRegMap); + unsigned FromRegB = getMappedReg(RegB, SrcRegMap); + if (!FromRegB) + return false; unsigned ToRegA = getMappedReg(RegA, DstRegMap); - return (FromRegA && ToRegA && !regsAreCompatible(FromRegA, ToRegA, TRI)); + return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI)); } /// ConvertInstTo3Addr - Convert the specified two-address instruction into a @@ -664,6 +670,54 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, return false; } +/// ScanUses - Scan forward recursively for only uses, update maps if the use +/// is a copy or a two-address instruction. +void +TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB, + SmallPtrSet<MachineInstr*, 8> &Processed) { + SmallVector<unsigned, 4> VirtRegPairs; + bool IsDstPhys; + bool IsCopy = false; + unsigned NewReg = 0; + unsigned Reg = DstReg; + while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy, + NewReg, IsDstPhys)) { + if (IsCopy && !Processed.insert(UseMI)) + break; + + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI); + if (DI != DistanceMap.end()) + // Earlier in the same MBB.Reached via a back edge. + break; + + if (IsDstPhys) { + VirtRegPairs.push_back(NewReg); + break; + } + bool isNew = SrcRegMap.insert(std::make_pair(NewReg, Reg)).second; + if (!isNew) + assert(SrcRegMap[NewReg] == Reg && "Can't map to two src registers!"); + VirtRegPairs.push_back(NewReg); + Reg = NewReg; + } + + if (!VirtRegPairs.empty()) { + unsigned ToReg = VirtRegPairs.back(); + VirtRegPairs.pop_back(); + while (!VirtRegPairs.empty()) { + unsigned FromReg = VirtRegPairs.back(); + VirtRegPairs.pop_back(); + bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second; + if (!isNew) + assert(DstRegMap[FromReg] == ToReg &&"Can't map to two dst registers!"); + ToReg = FromReg; + } + bool isNew = DstRegMap.insert(std::make_pair(DstReg, ToReg)).second; + if (!isNew) + assert(DstRegMap[DstReg] == ToReg && "Can't map to two dst registers!"); + } +} + /// ProcessCopy - If the specified instruction is not yet processed, process it /// if it's a copy. For a copy instruction, we find the physical registers the /// source and destination registers might be mapped to. These are kept in @@ -695,49 +749,11 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, assert(SrcRegMap[DstReg] == SrcReg && "Can't map to two src physical registers!"); - SmallVector<unsigned, 4> VirtRegPairs; - bool IsCopy = false; - unsigned NewReg = 0; - while (MachineInstr *UseMI = findOnlyInterestingUse(DstReg, MBB, MRI,TII, - IsCopy, NewReg, IsDstPhys)) { - if (IsCopy) { - if (!Processed.insert(UseMI)) - break; - } - - DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI); - if (DI != DistanceMap.end()) - // Earlier in the same MBB.Reached via a back edge. - break; - - if (IsDstPhys) { - VirtRegPairs.push_back(NewReg); - break; - } - bool isNew = SrcRegMap.insert(std::make_pair(NewReg, DstReg)).second; - if (!isNew) - assert(SrcRegMap[NewReg] == DstReg && - "Can't map to two src physical registers!"); - VirtRegPairs.push_back(NewReg); - DstReg = NewReg; - } - - if (!VirtRegPairs.empty()) { - unsigned ToReg = VirtRegPairs.back(); - VirtRegPairs.pop_back(); - while (!VirtRegPairs.empty()) { - unsigned FromReg = VirtRegPairs.back(); - VirtRegPairs.pop_back(); - bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second; - if (!isNew) - assert(DstRegMap[FromReg] == ToReg && - "Can't map to two dst physical registers!"); - ToReg = FromReg; - } - } + ScanUses(DstReg, MBB, Processed); } Processed.insert(MI); + return; } /// isSafeToDelete - If the specified instruction does not produce any side @@ -836,7 +852,8 @@ bool TwoAddressInstructionPass:: TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, - unsigned SrcIdx, unsigned DstIdx, unsigned Dist) { + unsigned SrcIdx, unsigned DstIdx, unsigned Dist, + SmallPtrSet<MachineInstr*, 8> &Processed) { const TargetInstrDesc &TID = mi->getDesc(); unsigned regA = mi->getOperand(DstIdx).getReg(); unsigned regB = mi->getOperand(SrcIdx).getReg(); @@ -887,10 +904,13 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, return false; } + if (TargetRegisterInfo::isVirtualRegister(regA)) + ScanUses(regA, &*mbbi, Processed); + if (TID.isConvertibleTo3Addr()) { // This instruction is potentially convertible to a true // three-address instruction. Check if it is profitable. - if (!regBKilled || isProfitableToConv3Addr(regA)) { + if (!regBKilled || isProfitableToConv3Addr(regA, regB)) { // Try to convert it. if (ConvertInstTo3Addr(mi, nmi, mbbi, regA, regB, Dist)) { ++NumConvertedTo3Addr; @@ -951,7 +971,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator NewMI = NewMIs[1]; bool TransformSuccess = TryInstructionTransform(NewMI, mi, mbbi, - NewSrcIdx, NewDstIdx, Dist); + NewSrcIdx, NewDstIdx, Dist, Processed); if (TransformSuccess || NewMIs[1]->getOperand(NewSrcIdx).isKill()) { // Success, or at least we made an improvement. Keep the unfolded @@ -1100,7 +1120,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { mi->getOperand(DstIdx).getReg()) break; // Done with this instruction. - if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist)) + if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist, + Processed)) break; // The tied operands have been eliminated. } diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 734b87e..226b78f 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -259,7 +259,9 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) { DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" << "********** Function: " << MF->getFunction()->getName() << '\n'); - + DEBUG(dump()); + SmallVector<unsigned, 8> SuperDeads; + SmallVector<unsigned, 8> SuperDefs; SmallVector<unsigned, 8> SuperKills; for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); @@ -283,12 +285,13 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) { if (MO.getSubReg()) { // A virtual register kill refers to the whole register, so we may // have to add <imp-use,kill> operands for the super-register. - if (MO.isUse() && MO.isKill() && !MO.isUndef()) - SuperKills.push_back(PhysReg); - - // We don't have to deal with sub-register defs because - // LiveIntervalAnalysis already added the necessary <imp-def> - // operands. + if (MO.isUse()) { + if (MO.isKill() && !MO.isUndef()) + SuperKills.push_back(PhysReg); + } else if (MO.isDead()) + SuperDeads.push_back(PhysReg); + else + SuperDefs.push_back(PhysReg); // PhysReg operands cannot have subregister indexes. PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg()); @@ -305,16 +308,28 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) { while (!SuperKills.empty()) MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true); + while (!SuperDeads.empty()) + MI->addRegisterDead(SuperDeads.pop_back_val(), TRI, true); + + while (!SuperDefs.empty()) + MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI); + DEBUG(dbgs() << "> " << *MI); // Finally, remove any identity copies. if (MI->isIdentityCopy()) { - DEBUG(dbgs() << "Deleting identity copy.\n"); - RemoveMachineInstrFromMaps(MI); - if (Indexes) - Indexes->removeMachineInstrFromMaps(MI); - // It's safe to erase MI because MII has already been incremented. - MI->eraseFromParent(); + if (MI->getNumOperands() == 2) { + DEBUG(dbgs() << "Deleting identity copy.\n"); + RemoveMachineInstrFromMaps(MI); + if (Indexes) + Indexes->removeMachineInstrFromMaps(MI); + // It's safe to erase MI because MII has already been incremented. + MI->eraseFromParent(); + } else { + // Transform identity copy to a KILL to deal with subregisters. + MI->setDesc(TII->get(TargetOpcode::KILL)); + DEBUG(dbgs() << "Identity copy: " << *MI); + } } } } diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index ec149dd..1850658 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -32,7 +32,7 @@ STATISTIC(NumCommutes, "Number of instructions commuted"); STATISTIC(NumDRM , "Number of re-materializable defs elided"); STATISTIC(NumStores , "Number of stores added"); STATISTIC(NumPSpills , "Number of physical register spills"); -STATISTIC(NumOmitted , "Number of reloads omited"); +STATISTIC(NumOmitted , "Number of reloads omitted"); STATISTIC(NumAvoided , "Number of reloads deemed unnecessary"); STATISTIC(NumCopified, "Number of available reloads turned into copies"); STATISTIC(NumReMats , "Number of re-materialization"); @@ -261,6 +261,10 @@ public: /// now). void ModifyStackSlotOrReMat(int SlotOrReMat); + /// ClobberSharingStackSlots - When a register mapped to a stack slot changes, + /// other stack slots sharing the same register are no longer valid. + void ClobberSharingStackSlots(int StackSlot); + /// AddAvailableRegsToLiveIn - Availability information is being kept coming /// into the specified MBB. Add available physical registers as potential /// live-in's. If they are reused in the MBB, they will be added to the @@ -665,7 +669,7 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI, } } -/// ReMaterialize - Re-materialize definition for Reg targetting DestReg. +/// ReMaterialize - Re-materialize definition for Reg targeting DestReg. /// static void ReMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MII, @@ -831,6 +835,26 @@ void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) { PhysRegsAvailable.erase(I); } +void AvailableSpills::ClobberSharingStackSlots(int StackSlot) { + std::map<int, unsigned>::iterator It = + SpillSlotsOrReMatsAvailable.find(StackSlot); + if (It == SpillSlotsOrReMatsAvailable.end()) return; + unsigned Reg = It->second >> 1; + + // Erase entries in PhysRegsAvailable for other stack slots. + std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg); + while (I != PhysRegsAvailable.end() && I->first == Reg) { + std::multimap<unsigned, int>::iterator NextI = llvm::next(I); + if (I->second != StackSlot) { + DEBUG(dbgs() << "Clobbered sharing SS#" << I->second << " in " + << PrintReg(Reg, TRI) << '\n'); + SpillSlotsOrReMatsAvailable.erase(I->second); + PhysRegsAvailable.erase(I); + } + I = NextI; + } +} + // ************************** // // Reuse Info Implementation // // ************************** // @@ -1791,8 +1815,8 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI, else DEBUG(dbgs() << "Reusing SS#" << SSorRMId); DEBUG(dbgs() << " from physreg " - << TRI->getName(InReg) << " for vreg" - << VirtReg <<" instead of reloading into physreg " + << TRI->getName(InReg) << " for " << PrintReg(VirtReg) + <<" instead of reloading into physreg " << TRI->getName(Phys) << '\n'); // Reusing a physreg may resurrect it. But we expect ProcessUses to update @@ -1807,8 +1831,8 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI, else DEBUG(dbgs() << "Reusing SS#" << SSorRMId); DEBUG(dbgs() << " from physreg " - << TRI->getName(InReg) << " for vreg" - << VirtReg <<" by copying it into physreg " + << TRI->getName(InReg) << " for " << PrintReg(VirtReg) + <<" by copying it into physreg " << TRI->getName(Phys) << '\n'); // If the reloaded / remat value is available in another register, @@ -2025,7 +2049,8 @@ void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills, TRI->regsOverlap(MOk.getReg(), PhysReg)) { CanReuse = false; DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg) - << " for vreg" << VirtReg << ": " << MOk << '\n'); + << " for " << PrintReg(VirtReg) << ": " << MOk + << '\n'); break; } } @@ -2039,9 +2064,9 @@ void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills, else DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); DEBUG(dbgs() << " from physreg " - << TRI->getName(PhysReg) << " for vreg" - << VirtReg <<" instead of reloading into physreg " - << TRI->getName(VRM->getPhys(VirtReg)) << '\n'); + << TRI->getName(PhysReg) << " for " << PrintReg(VirtReg) + << " instead of reloading into " + << PrintReg(VRM->getPhys(VirtReg), TRI) << '\n'); unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; MI.getOperand(i).setReg(RReg); MI.getOperand(i).setSubReg(0); @@ -2126,7 +2151,7 @@ void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills, else DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg) - << " for vreg" << VirtReg + << " for " << PrintReg(VirtReg) << " instead of reloading into same physreg.\n"); unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; MI.getOperand(i).setReg(RReg); @@ -2315,7 +2340,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, for (unsigned FVI = 0, FVE = FoldedVirts.size(); FVI != FVE; ++FVI) { unsigned VirtReg = FoldedVirts[FVI].first; VirtRegMap::ModRef MR = FoldedVirts[FVI].second; - DEBUG(dbgs() << "Folded vreg: " << VirtReg << " MR: " << MR); + DEBUG(dbgs() << "Folded " << PrintReg(VirtReg) << " MR: " << MR); int SS = VRM->getStackSlot(VirtReg); if (SS == VirtRegMap::NO_STACK_SLOT) @@ -2549,6 +2574,10 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, } } + // If StackSlot is available in a register that also holds other stack + // slots, clobber those stack slots now. + Spills.ClobberSharingStackSlots(StackSlot); + assert(PhysReg && "VR not assigned a physical register?"); MRI->setPhysRegUsed(PhysReg); unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; diff --git a/lib/CompilerDriver/CMakeLists.txt b/lib/CompilerDriver/CMakeLists.txt index 2248de0..a12b337 100644 --- a/lib/CompilerDriver/CMakeLists.txt +++ b/lib/CompilerDriver/CMakeLists.txt @@ -1,10 +1,12 @@ set(LLVM_LINK_COMPONENTS support) -set(LLVM_REQUIRES_EH 1) -add_llvm_tool(llvmc +# We don't want this library to appear in `llvm-config --libs` output, +# so its name doesn't start with "LLVM". + +add_llvm_library(CompilerDriver Action.cpp + BuiltinOptions.cpp CompilationGraph.cpp - llvmc.cpp - Plugin.cpp + Main.cpp Tool.cpp ) diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt index b5632d2..8bff265 100644 --- a/lib/ExecutionEngine/CMakeLists.txt +++ b/lib/ExecutionEngine/CMakeLists.txt @@ -6,3 +6,4 @@ add_llvm_library(LLVMExecutionEngine add_subdirectory(Interpreter) add_subdirectory(JIT) add_subdirectory(MCJIT) +add_subdirectory(RuntimeDyld) diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index f286975..2b1e878 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -79,9 +79,10 @@ ExecutionEngine::~ExecutionEngine() { void ExecutionEngine::DeregisterAllTables() { if (ExceptionTableDeregister) { - for (std::vector<void*>::iterator it = AllExceptionTables.begin(), - ie = AllExceptionTables.end(); it != ie; ++it) - ExceptionTableDeregister(*it); + DenseMap<const Function*, void*>::iterator it = AllExceptionTables.begin(); + DenseMap<const Function*, void*>::iterator ite = AllExceptionTables.end(); + for (; it != ite; ++it) + ExceptionTableDeregister(it->second); AllExceptionTables.clear(); } } @@ -310,19 +311,19 @@ void ExecutionEngine::runStaticConstructorsDestructors(Module *module, // it. if (!GV || GV->isDeclaration() || GV->hasLocalLinkage()) return; - // Should be an array of '{ int, void ()* }' structs. The first value is + // Should be an array of '{ i32, void ()* }' structs. The first value is // the init priority, which we ignore. - ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); - if (!InitList) return; + if (isa<ConstantAggregateZero>(GV->getInitializer())) + return; + ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer()); for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { - ConstantStruct *CS = - dyn_cast<ConstantStruct>(InitList->getOperand(i)); - if (!CS) continue; - if (CS->getNumOperands() != 2) return; // Not array of 2-element structs. + if (isa<ConstantAggregateZero>(InitList->getOperand(i))) + continue; + ConstantStruct *CS = cast<ConstantStruct>(InitList->getOperand(i)); Constant *FP = CS->getOperand(1); if (FP->isNullValue()) - break; // Found a null terminator, exit. + continue; // Found a sentinal value, ignore. // Strip off constant expression casts. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP)) @@ -838,7 +839,7 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val, case Type::PointerTyID: // Ensure 64 bit target pointers are fully initialized on 32 bit hosts. if (StoreBytes != sizeof(PointerTy)) - memset(Ptr, 0, StoreBytes); + memset(&(Ptr->PointerVal), 0, StoreBytes); *((PointerTy*)Ptr) = Val.PointerVal; break; diff --git a/lib/ExecutionEngine/JIT/Intercept.cpp b/lib/ExecutionEngine/JIT/Intercept.cpp index 169e1ba..fa8bee4 100644 --- a/lib/ExecutionEngine/JIT/Intercept.cpp +++ b/lib/ExecutionEngine/JIT/Intercept.cpp @@ -52,8 +52,8 @@ static void runAtExitHandlers() { #include <sys/stat.h> #endif #include <fcntl.h> -/* stat functions are redirecting to __xstat with a version number. On x86-64 - * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' +/* stat functions are redirecting to __xstat with a version number. On x86-64 + * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' * available as an exported symbol, so we have to add it explicitly. */ namespace { @@ -119,18 +119,18 @@ void *JIT::getPointerToNamedFunction(const std::string &Name, const char *NameStr = Name.c_str(); // If this is an asm specifier, skip the sentinal. if (NameStr[0] == 1) ++NameStr; - + // If it's an external function, look it up in the process image... void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr); if (Ptr) return Ptr; - + // If it wasn't found and if it starts with an underscore ('_') character, // and has an asm specifier, try again without the underscore. if (Name[0] == 1 && NameStr[0] == '_') { Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1); if (Ptr) return Ptr; } - + // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These // are references to hidden visibility symbols that dlsym cannot resolve. // If we have one of these, strip off $LDBLStub and try again. @@ -147,7 +147,7 @@ void *JIT::getPointerToNamedFunction(const std::string &Name, } #endif } - + /// If a LazyFunctionCreator is installed, use it to get/create the function. if (LazyFunctionCreator) if (void *RP = LazyFunctionCreator(Name)) diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index cc76b13..d1f87ac 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -35,7 +35,7 @@ using namespace llvm; -#ifdef __APPLE__ +#ifdef __APPLE__ // Apple gcc defaults to -fuse-cxa-atexit (i.e. calls __cxa_atexit instead // of atexit). It passes the address of linker generated symbol __dso_handle // to the function. @@ -75,7 +75,7 @@ extern "C" void LLVMLinkInJIT() { #endif #if HAVE_EHTABLE_SUPPORT - + // libgcc defines the __register_frame function to dynamically register new // dwarf frames for exception handling. This functionality is not portable // across compilers and is only provided by GCC. We use the __register_frame @@ -113,10 +113,10 @@ struct LibgccObject { void *unused1; void *unused2; void *unused3; - + /// frame - Pointer to the exception table. void *frame; - + /// encoding - The encoding of the object? union { struct { @@ -124,15 +124,15 @@ struct LibgccObject { unsigned long from_array : 1; unsigned long mixed_encoding : 1; unsigned long encoding : 8; - unsigned long count : 21; + unsigned long count : 21; } b; size_t i; } encoding; - + /// fde_end - libgcc defines this field only if some macro is defined. We /// include this field even if it may not there, to make libgcc happy. char *fde_end; - + /// next - At least we know it's a chained list! struct LibgccObject *next; }; @@ -153,7 +153,7 @@ struct LibgccObjectInfo { /// unseenObjects - LibgccObjects not parsed yet by the unwinding runtime. /// struct LibgccObject* unseenObjects; - + unsigned unused[2]; }; @@ -165,32 +165,32 @@ void DarwinRegisterFrame(void* FrameBegin) { LibgccObjectInfo* LOI = (struct LibgccObjectInfo*) _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST); assert(LOI && "This should be preallocated by the runtime"); - + // Allocate a new LibgccObject to represent this frame. Deallocation of this // object may be impossible: since darwin code in libgcc was written after // the ability to dynamically register frames, things may crash if we // deallocate it. struct LibgccObject* ob = (struct LibgccObject*) malloc(sizeof(struct LibgccObject)); - + // Do like libgcc for the values of the field. ob->unused1 = (void *)-1; ob->unused2 = 0; ob->unused3 = 0; ob->frame = FrameBegin; - ob->encoding.i = 0; + ob->encoding.i = 0; ob->encoding.b.encoding = llvm::dwarf::DW_EH_PE_omit; - + // Put the info on both places, as libgcc uses the first or the second // field. Note that we rely on having two pointers here. If fde_end was a // char, things would get complicated. ob->fde_end = (char*)LOI->unseenObjects; ob->next = LOI->unseenObjects; - + // Update the key's unseenObjects list. LOI->unseenObjects = ob; - - // Finally update the "key". Apparently, libgcc requires it. + + // Finally update the "key". Apparently, libgcc requires it. _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST, LOI); @@ -312,18 +312,18 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, if (TM.addPassesToEmitMachineCode(PM, *JCE, OptLevel)) { report_fatal_error("Target does not support machine code emission!"); } - + // Register routine for informing unwinding runtime about new EH frames #if HAVE_EHTABLE_SUPPORT #if USE_KEYMGR struct LibgccObjectInfo* LOI = (struct LibgccObjectInfo*) _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST); - + // The key is created on demand, and libgcc creates it the first time an // exception occurs. Since we need the key to register frames, we create // it now. if (!LOI) - LOI = (LibgccObjectInfo*)calloc(sizeof(struct LibgccObjectInfo), 1); + LOI = (LibgccObjectInfo*)calloc(sizeof(struct LibgccObjectInfo), 1); _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST, LOI); InstallExceptionTableRegister(DarwinRegisterFrame); // Not sure about how to deregister on Darwin. @@ -332,7 +332,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, InstallExceptionTableDeregister(__deregister_frame); #endif // __APPLE__ #endif // HAVE_EHTABLE_SUPPORT - + // Initialize passes. PM.doInitialization(); } @@ -365,11 +365,11 @@ void JIT::addModule(Module *M) { if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) { report_fatal_error("Target does not support machine code emission!"); } - + // Initialize passes. PM.doInitialization(); } - + ExecutionEngine::addModule(M); } @@ -377,29 +377,29 @@ void JIT::addModule(Module *M) { /// since the PassManager it contains references a released Module. bool JIT::removeModule(Module *M) { bool result = ExecutionEngine::removeModule(M); - + MutexGuard locked(lock); - + if (jitstate->getModule() == M) { delete jitstate; jitstate = 0; } - + if (!jitstate && !Modules.empty()) { jitstate = new JITState(Modules[0]); FunctionPassManager &PM = jitstate->getPM(locked); PM.add(new TargetData(*TM.getTargetData())); - + // Turn the machine code intermediate representation into bytes in memory // that may be executed. if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) { report_fatal_error("Target does not support machine code emission!"); } - + // Initialize passes. PM.doInitialization(); - } + } return result; } @@ -433,7 +433,7 @@ GenericValue JIT::runFunction(Function *F, // Call the function. GenericValue rv; - rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), + rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), (char **)GVTOP(ArgValues[1]), (const char **)GVTOP(ArgValues[2]))); return rv; @@ -446,7 +446,7 @@ GenericValue JIT::runFunction(Function *F, // Call the function. GenericValue rv; - rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), + rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), (char **)GVTOP(ArgValues[1]))); return rv; } @@ -480,7 +480,7 @@ GenericValue JIT::runFunction(Function *F, rv.IntVal = APInt(BitWidth, ((int(*)())(intptr_t)FPtr)()); else if (BitWidth <= 64) rv.IntVal = APInt(BitWidth, ((int64_t(*)())(intptr_t)FPtr)()); - else + else llvm_unreachable("Integer types > 64 bits not supported"); return rv; } @@ -542,7 +542,7 @@ GenericValue JIT::runFunction(Function *F, case Type::PointerTyID: void *ArgPtr = GVTOP(AV); if (sizeof(void*) == 4) - C = ConstantInt::get(Type::getInt32Ty(F->getContext()), + C = ConstantInt::get(Type::getInt32Ty(F->getContext()), (int)(intptr_t)ArgPtr); else C = ConstantInt::get(Type::getInt64Ty(F->getContext()), @@ -649,7 +649,7 @@ void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) { "Externally-defined function should not be in pending list."); jitTheFunction(PF, locked); - + // Now that the function has been jitted, ask the JITEmitter to rewrite // the stub with real address of the function. updateFunctionStub(PF); @@ -666,7 +666,7 @@ void JIT::jitTheFunction(Function *F, const MutexGuard &locked) { } /// getPointerToFunction - This method is used to get the address of the -/// specified function, compiling it if neccesary. +/// specified function, compiling it if necessary. /// void *JIT::getPointerToFunction(Function *F) { @@ -703,7 +703,7 @@ void *JIT::getPointerToFunction(Function *F) { void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) { MutexGuard locked(lock); - + BasicBlockAddressMapTy::iterator I = getBasicBlockAddressMap(locked).find(BB); if (I == getBasicBlockAddressMap(locked).end()) { @@ -724,7 +724,7 @@ void *JIT::getPointerToBasicBlock(BasicBlock *BB) { // resolve basic block address MutexGuard locked(lock); - + BasicBlockAddressMapTy::iterator I = getBasicBlockAddressMap(locked).find(BB); if (I != getBasicBlockAddressMap(locked).end()) { diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h index 1d1763e..b576c16 100644 --- a/lib/ExecutionEngine/JIT/JIT.h +++ b/lib/ExecutionEngine/JIT/JIT.h @@ -42,7 +42,7 @@ public: FunctionPassManager &getPM(const MutexGuard &L) { return PM; } - + Module *getModule() const { return M; } std::vector<AssertingVH<Function> > &getPendingFunctions(const MutexGuard &L){ return PendingFunctions; @@ -86,7 +86,7 @@ public: static void Register() { JITCtor = createJIT; } - + /// getJITInfo - Return the target JIT information structure. /// TargetJITInfo &getJITInfo() const { return TJI; } @@ -106,7 +106,7 @@ public: } virtual void addModule(Module *M); - + /// removeModule - Remove a Module from the list of modules. Returns true if /// M is found. virtual bool removeModule(Module *M); @@ -146,7 +146,7 @@ public: /// getPointerToBasicBlock - This returns the address of the specified basic /// block, assuming function is compiled. void *getPointerToBasicBlock(BasicBlock *BB); - + /// getOrEmitGlobalVariable - Return the address of the specified global /// variable, possibly emitting it to memory if needed. This is used by the /// Emitter. @@ -172,7 +172,7 @@ public: void freeMachineCodeForFunction(Function *F); /// addPendingFunction - while jitting non-lazily, a called but non-codegen'd - /// function was encountered. Add it to a pending list to be processed after + /// function was encountered. Add it to a pending list to be processed after /// the current function. /// void addPendingFunction(Function *F); diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp index 3b5acb7..e71c20b 100644 --- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp +++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp @@ -27,7 +27,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Mutex.h" #include <string> -#include <vector> namespace llvm { @@ -143,7 +142,7 @@ void JITDebugRegisterer::RegisterFunction(const Function *F, DebugInfo &I) { // Add a mapping from F to the entry and buffer, so we can delete this // info later. - FnMap[F] = std::make_pair<std::string, jit_code_entry*>(Buffer, JITCodeEntry); + FnMap[F] = std::make_pair(Buffer, JITCodeEntry); // Acquire the lock and do the registration. { diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp index f54ccca..ddb0d54 100644 --- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp @@ -34,7 +34,7 @@ using namespace llvm; JITDwarfEmitter::JITDwarfEmitter(JIT& theJit) : MMI(0), Jit(theJit) {} -unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F, +unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F, JITCodeEmitter& jce, unsigned char* StartFunction, unsigned char* EndFunction, @@ -47,10 +47,10 @@ unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F, RI = TM.getRegisterInfo(); TFI = TM.getFrameLowering(); JCE = &jce; - + unsigned char* ExceptionTable = EmitExceptionTable(&F, StartFunction, EndFunction); - + unsigned char* Result = 0; const std::vector<const Function *> Personalities = MMI->getPersonalities(); @@ -63,7 +63,7 @@ unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F, } -void +void JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr, const std::vector<MachineMove> &Moves) const { unsigned PointerSize = TD->getPointerSize(); @@ -74,26 +74,26 @@ JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr, for (unsigned i = 0, N = Moves.size(); i < N; ++i) { const MachineMove &Move = Moves[i]; MCSymbol *Label = Move.getLabel(); - + // Throw out move if the label is invalid. if (Label && (*JCE->getLabelLocations())[Label] == 0) continue; - + intptr_t LabelPtr = 0; if (Label) LabelPtr = JCE->getLabelAddress(Label); const MachineLocation &Dst = Move.getDestination(); const MachineLocation &Src = Move.getSource(); - + // Advance row if new location. if (BaseLabelPtr && Label && BaseLabel != Label) { JCE->emitByte(dwarf::DW_CFA_advance_loc4); JCE->emitInt32(LabelPtr - BaseLabelPtr); - - BaseLabel = Label; + + BaseLabel = Label; BaseLabelPtr = LabelPtr; } - + // If advancing cfa. if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { if (!Src.isReg()) { @@ -103,7 +103,7 @@ JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr, JCE->emitByte(dwarf::DW_CFA_def_cfa); JCE->emitULEB128Bytes(RI->getDwarfRegNum(Src.getReg(), true)); } - + JCE->emitULEB128Bytes(-Src.getOffset()); } else { llvm_unreachable("Machine move not supported yet."); @@ -119,7 +119,7 @@ JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr, } else { unsigned Reg = RI->getDwarfRegNum(Src.getReg(), true); int Offset = Dst.getOffset() / stackGrowth; - + if (Offset < 0) { JCE->emitByte(dwarf::DW_CFA_offset_extended_sf); JCE->emitULEB128Bytes(Reg); @@ -382,7 +382,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF, unsigned TypeOffset = sizeof(int8_t) + // Call site format // Call-site table length - MCAsmInfo::getULEB128Size(SizeSites) + + MCAsmInfo::getULEB128Size(SizeSites) + SizeSites + SizeActions + SizeTypes; // Begin the exception table. @@ -452,7 +452,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF, // Emit the type ids. for (unsigned M = TypeInfos.size(); M; --M) { const GlobalVariable *GV = TypeInfos[M - 1]; - + if (GV) { if (TD->getPointerSize() == sizeof(int32_t)) JCE->emitInt32((intptr_t)Jit.getOrEmitGlobalVariable(GV)); @@ -484,7 +484,7 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const { unsigned PointerSize = TD->getPointerSize(); int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ? PointerSize : -PointerSize; - + unsigned char* StartCommonPtr = (unsigned char*)JCE->getCurrentPCValue(); // EH Common Frame header JCE->allocateSpace(4, 0); @@ -499,13 +499,13 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const { if (Personality) { // Augmentation Size: 3 small ULEBs of one byte each, and the personality // function which size is PointerSize. - JCE->emitULEB128Bytes(3 + PointerSize); - + JCE->emitULEB128Bytes(3 + PointerSize); + // We set the encoding of the personality as direct encoding because we use // the function pointer. The encoding is not relative because the current // PC value may be bigger than the personality function pointer. if (PointerSize == 4) { - JCE->emitByte(dwarf::DW_EH_PE_sdata4); + JCE->emitByte(dwarf::DW_EH_PE_sdata4); JCE->emitInt32(((intptr_t)Jit.getPointerToGlobal(Personality))); } else { JCE->emitByte(dwarf::DW_EH_PE_sdata8); @@ -540,11 +540,11 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const { unsigned char* JITDwarfEmitter::EmitEHFrame(const Function* Personality, unsigned char* StartCommonPtr, - unsigned char* StartFunction, + unsigned char* StartFunction, unsigned char* EndFunction, unsigned char* ExceptionTable) const { unsigned PointerSize = TD->getPointerSize(); - + // EH frame header. unsigned char* StartEHPtr = (unsigned char*)JCE->getCurrentPCValue(); JCE->allocateSpace(4, 0); @@ -558,7 +558,7 @@ JITDwarfEmitter::EmitEHFrame(const Function* Personality, // specific data area in the exception table. if (Personality) { JCE->emitULEB128Bytes(PointerSize == 4 ? 4 : 8); - + if (PointerSize == 4) { if (!MMI->getLandingPads().empty()) JCE->emitInt32(ExceptionTable-(unsigned char*)JCE->getCurrentPCValue()); @@ -573,7 +573,7 @@ JITDwarfEmitter::EmitEHFrame(const Function* Personality, } else { JCE->emitULEB128Bytes(0); } - + // Indicate locations of function specific callee saved registers in // frame. EmitFrameMoves((intptr_t)StartFunction, MMI->getFrameMoves()); @@ -593,6 +593,6 @@ JITDwarfEmitter::EmitEHFrame(const Function* Personality, JCE->emitInt32(0); JCE->emitInt32(0); } - + return StartEHPtr; } diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h index 9495697..e1d0045 100644 --- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h +++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h @@ -35,33 +35,33 @@ class JITDwarfEmitter { MachineModuleInfo* MMI; JIT& Jit; bool stackGrowthDirection; - + unsigned char* EmitExceptionTable(MachineFunction* MF, - unsigned char* StartFunction, + unsigned char* StartFunction, unsigned char* EndFunction) const; - void EmitFrameMoves(intptr_t BaseLabelPtr, + void EmitFrameMoves(intptr_t BaseLabelPtr, const std::vector<MachineMove> &Moves) const; - + unsigned char* EmitCommonEHFrame(const Function* Personality) const; - unsigned char* EmitEHFrame(const Function* Personality, + unsigned char* EmitEHFrame(const Function* Personality, unsigned char* StartBufferPtr, - unsigned char* StartFunction, + unsigned char* StartFunction, unsigned char* EndFunction, unsigned char* ExceptionTable) const; - + public: - + JITDwarfEmitter(JIT& jit); - - unsigned char* EmitDwarfTable(MachineFunction& F, + + unsigned char* EmitDwarfTable(MachineFunction& F, JITCodeEmitter& JCE, unsigned char* StartFunction, unsigned char* EndFunction, unsigned char* &EHFramePtr); - - + + void setModuleInfo(MachineModuleInfo* Info) { MMI = Info; } diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index 4cd8757..d046b8a 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -123,17 +123,18 @@ namespace { return FunctionToLazyStubMap; } - GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap(const MutexGuard& locked) { - assert(locked.holds(TheJIT->lock)); + GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap(const MutexGuard& lck) { + assert(lck.holds(TheJIT->lock)); return GlobalToIndirectSymMap; } - pair<void *, Function *> LookupFunctionFromCallSite( + std::pair<void *, Function *> LookupFunctionFromCallSite( const MutexGuard &locked, void *CallSite) const { assert(locked.holds(TheJIT->lock)); - // The address given to us for the stub may not be exactly right, it might be - // a little bit after the stub. As such, use upper_bound to find it. + // The address given to us for the stub may not be exactly right, it + // might be a little bit after the stub. As such, use upper_bound to + // find it. CallSiteToFunctionMapTy::const_iterator I = CallSiteToFunctionMap.upper_bound(CallSite); assert(I != CallSiteToFunctionMap.begin() && @@ -645,7 +646,7 @@ void *JITResolver::JITCompilerFn(void *Stub) { // The address given to us for the stub may not be exactly right, it might // be a little bit after the stub. As such, use upper_bound to find it. - pair<void*, Function*> I = + std::pair<void*, Function*> I = JR->state.LookupFunctionFromCallSite(locked, Stub); F = I.second; ActualPtr = I.first; @@ -659,7 +660,8 @@ void *JITResolver::JITCompilerFn(void *Stub) { // If lazy compilation is disabled, emit a useful error message and abort. if (!JR->TheJIT->isCompilingLazily()) { - report_fatal_error("LLVM JIT requested to do lazy compilation of function '" + report_fatal_error("LLVM JIT requested to do lazy compilation of" + " function '" + F->getName() + "' when lazy compiles are disabled!"); } @@ -745,7 +747,7 @@ void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) { void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) { if (DL.isUnknown()) return; if (!BeforePrintingInsn) return; - + const LLVMContext &Context = EmissionDetails.MF->getFunction()->getContext(); if (DL.getScope(Context) != 0 && PrevDL != DL) { @@ -781,7 +783,7 @@ void JITEmitter::startFunction(MachineFunction &F) { uintptr_t ActualSize = 0; // Set the memory writable, if it's not already MemMgr->setMemoryWritable(); - + if (SizeEstimate > 0) { // SizeEstimate will be non-zero on reallocation attempts. ActualSize = SizeEstimate; @@ -859,7 +861,8 @@ bool JITEmitter::finishFunction(MachineFunction &F) { } else if (MR.isBasicBlock()) { ResultPtr = (void*)getMachineBasicBlockAddress(MR.getBasicBlock()); } else if (MR.isConstantPoolIndex()) { - ResultPtr = (void*)getConstantPoolEntryAddress(MR.getConstantPoolIndex()); + ResultPtr = + (void*)getConstantPoolEntryAddress(MR.getConstantPoolIndex()); } else { assert(MR.isJumpTableIndex()); ResultPtr=(void*)getJumpTableEntryAddress(MR.getJumpTableIndex()); @@ -985,7 +988,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { CurBufferPtr = SavedCurBufferPtr; if (JITExceptionHandling) { - TheJIT->RegisterTable(FrameRegister); + TheJIT->RegisterTable(F.getFunction(), FrameRegister); } if (JITEmitDebugInfo) { @@ -1033,8 +1036,9 @@ void JITEmitter::deallocateMemForFunction(const Function *F) { EmittedFunctions.erase(Emitted); } - // TODO: Do we need to unregister exception handling information from libgcc - // here? + if(JITExceptionHandling) { + TheJIT->DeregisterTable(F); + } if (JITEmitDebugInfo) { DR->UnregisterFunction(F); @@ -1129,7 +1133,7 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) { const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); if (JT.empty() || JumpTableBase == 0) return; - + switch (MJTI->getEntryKind()) { case MachineJumpTableInfo::EK_Inline: return; @@ -1138,11 +1142,11 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) { // .word LBB123 assert(MJTI->getEntrySize(*TheJIT->getTargetData()) == sizeof(void*) && "Cross JIT'ing?"); - + // For each jump table, map each target in the jump table to the address of // an emitted MachineBasicBlock. intptr_t *SlotPtr = (intptr_t*)JumpTableBase; - + for (unsigned i = 0, e = JT.size(); i != e; ++i) { const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs; // Store the address of the basic block for this jump table slot in the @@ -1152,7 +1156,7 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) { } break; } - + case MachineJumpTableInfo::EK_Custom32: case MachineJumpTableInfo::EK_GPRel32BlockAddress: case MachineJumpTableInfo::EK_LabelDifference32: { diff --git a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp index 670fa7d..9a9ed6d 100644 --- a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp +++ b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp @@ -108,8 +108,8 @@ void OProfileJITEventListener::NotifyFunctionEmitted( if (op_write_native_code(Agent, F.getName().data(), reinterpret_cast<uint64_t>(FnStart), FnStart, FnSize) == -1) { - DEBUG(dbgs() << "Failed to tell OProfile about native function " - << F.getName() << " at [" + DEBUG(dbgs() << "Failed to tell OProfile about native function " + << F.getName() << " at [" << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n"); return; } @@ -153,9 +153,9 @@ void OProfileJITEventListener::NotifyFunctionEmitted( if (op_write_debug_line_info(Agent, FnStart, LineInfo.size(), &*LineInfo.begin()) == -1) { - DEBUG(dbgs() + DEBUG(dbgs() << "Failed to tell OProfile about line numbers for native function " - << F.getName() << " at [" + << F.getName() << " at [" << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n"); } } diff --git a/lib/ExecutionEngine/JIT/TargetSelect.cpp b/lib/ExecutionEngine/JIT/TargetSelect.cpp index 6b7173c..8d92ab0 100644 --- a/lib/ExecutionEngine/JIT/TargetSelect.cpp +++ b/lib/ExecutionEngine/JIT/TargetSelect.cpp @@ -84,7 +84,7 @@ TargetMachine *JIT::selectTarget(Module *Mod, } // Allocate a target... - TargetMachine *Target = + TargetMachine *Target = TheTarget->createTargetMachine(TheTriple.getTriple(), FeaturesStr); assert(Target && "Could not allocate target machine!"); return Target; diff --git a/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/lib/ExecutionEngine/MCJIT/CMakeLists.txt index f7ed176..6553079 100644 --- a/lib/ExecutionEngine/MCJIT/CMakeLists.txt +++ b/lib/ExecutionEngine/MCJIT/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_library(LLVMMCJIT MCJIT.cpp TargetSelect.cpp + Intercept.cpp ) diff --git a/lib/ExecutionEngine/MCJIT/Intercept.cpp b/lib/ExecutionEngine/MCJIT/Intercept.cpp new file mode 100644 index 0000000..e431c84 --- /dev/null +++ b/lib/ExecutionEngine/MCJIT/Intercept.cpp @@ -0,0 +1,161 @@ +//===-- Intercept.cpp - System function interception routines -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// If a function call occurs to an external function, the JIT is designed to use +// the dynamic loader interface to find a function to call. This is useful for +// calling system calls and library functions that are not available in LLVM. +// Some system calls, however, need to be handled specially. For this reason, +// we intercept some of them here and use our own stubs to handle them. +// +//===----------------------------------------------------------------------===// + +#include "MCJIT.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Config/config.h" +using namespace llvm; + +// AtExitHandlers - List of functions to call when the program exits, +// registered with the atexit() library function. +static std::vector<void (*)()> AtExitHandlers; + +/// runAtExitHandlers - Run any functions registered by the program's +/// calls to atexit(3), which we intercept and store in +/// AtExitHandlers. +/// +static void runAtExitHandlers() { + while (!AtExitHandlers.empty()) { + void (*Fn)() = AtExitHandlers.back(); + AtExitHandlers.pop_back(); + Fn(); + } +} + +//===----------------------------------------------------------------------===// +// Function stubs that are invoked instead of certain library calls +//===----------------------------------------------------------------------===// + +// Force the following functions to be linked in to anything that uses the +// JIT. This is a hack designed to work around the all-too-clever Glibc +// strategy of making these functions work differently when inlined vs. when +// not inlined, and hiding their real definitions in a separate archive file +// that the dynamic linker can't see. For more info, search for +// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274. +#if defined(__linux__) +#if defined(HAVE_SYS_STAT_H) +#include <sys/stat.h> +#endif +#include <fcntl.h> +/* stat functions are redirecting to __xstat with a version number. On x86-64 + * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' + * available as an exported symbol, so we have to add it explicitly. + */ +namespace { +class StatSymbols { +public: + StatSymbols() { + sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat); + sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat); + sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat); + sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64); + sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64); + sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64); + sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64); + sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64); + sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64); + sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit); + sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod); + } +}; +} +static StatSymbols initStatSymbols; +#endif // __linux__ + +// jit_exit - Used to intercept the "exit" library call. +static void jit_exit(int Status) { + runAtExitHandlers(); // Run atexit handlers... + exit(Status); +} + +// jit_atexit - Used to intercept the "atexit" library call. +static int jit_atexit(void (*Fn)()) { + AtExitHandlers.push_back(Fn); // Take note of atexit handler... + return 0; // Always successful +} + +static int jit_noop() { + return 0; +} + +//===----------------------------------------------------------------------===// +// +/// getPointerToNamedFunction - This method returns the address of the specified +/// function by using the dynamic loader interface. As such it is only useful +/// for resolving library symbols, not code generated symbols. +/// +void *MCJIT::getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure) { + if (!isSymbolSearchingDisabled()) { + // Check to see if this is one of the functions we want to intercept. Note, + // we cast to intptr_t here to silence a -pedantic warning that complains + // about casting a function pointer to a normal pointer. + if (Name == "exit") return (void*)(intptr_t)&jit_exit; + if (Name == "atexit") return (void*)(intptr_t)&jit_atexit; + + // We should not invoke parent's ctors/dtors from generated main()! + // On Mingw and Cygwin, the symbol __main is resolved to + // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors + // (and register wrong callee's dtors with atexit(3)). + // We expect ExecutionEngine::runStaticConstructorsDestructors() + // is called before ExecutionEngine::runFunctionAsMain() is called. + if (Name == "__main") return (void*)(intptr_t)&jit_noop; + + const char *NameStr = Name.c_str(); + // If this is an asm specifier, skip the sentinal. + if (NameStr[0] == 1) ++NameStr; + + // If it's an external function, look it up in the process image... + void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr); + if (Ptr) return Ptr; + + // If it wasn't found and if it starts with an underscore ('_') character, + // and has an asm specifier, try again without the underscore. + if (Name[0] == 1 && NameStr[0] == '_') { + Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1); + if (Ptr) return Ptr; + } + + // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These + // are references to hidden visibility symbols that dlsym cannot resolve. + // If we have one of these, strip off $LDBLStub and try again. +#if defined(__APPLE__) && defined(__ppc__) + if (Name.size() > 9 && Name[Name.size()-9] == '$' && + memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) { + // First try turning $LDBLStub into $LDBL128. If that fails, strip it off. + // This mirrors logic in libSystemStubs.a. + std::string Prefix = std::string(Name.begin(), Name.end()-9); + if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false)) + return Ptr; + if (void *Ptr = getPointerToNamedFunction(Prefix, false)) + return Ptr; + } +#endif + } + + /// If a LazyFunctionCreator is installed, use it to get/create the function. + if (LazyFunctionCreator) + if (void *RP = LazyFunctionCreator(Name)) + return RP; + + if (AbortOnFailure) { + report_fatal_error("Program used external function '"+Name+ + "' which could not be resolved!"); + } + return 0; +} diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index f1e9dab..3d4ee36 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -1,4 +1,4 @@ -//===-- JIT.cpp - MC-based Just-in-Time Compiler --------------------------===// +//===-- MCJIT.cpp - MC-based Just-in-Time Compiler ------------------------===// // // The LLVM Compiler Infrastructure // @@ -8,10 +8,17 @@ //===----------------------------------------------------------------------===// #include "MCJIT.h" +#include "MCJITMemoryManager.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/MCJIT.h" +#include "llvm/ExecutionEngine/JITMemoryManager.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Target/TargetData.h" using namespace llvm; @@ -51,20 +58,47 @@ ExecutionEngine *MCJIT::createJIT(Module *M, // If the target supports JIT code generation, create the JIT. if (TargetJITInfo *TJ = TM->getJITInfo()) - return new MCJIT(M, *TM, *TJ, JMM, OptLevel, GVsWithCode); + return new MCJIT(M, TM, *TJ, new MCJITMemoryManager(JMM), OptLevel, + GVsWithCode); if (ErrorStr) *ErrorStr = "target does not support JIT code generation"; return 0; } -MCJIT::MCJIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, - JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, +MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji, + RTDyldMemoryManager *MM, CodeGenOpt::Level OptLevel, bool AllocateGVsWithCode) - : ExecutionEngine(M) { + : ExecutionEngine(m), TM(tm), MemMgr(MM), M(m), OS(Buffer), Dyld(MM) { + + PM.add(new TargetData(*TM->getTargetData())); + + // Turn the machine code intermediate representation into bytes in memory + // that may be executed. + if (TM->addPassesToEmitMC(PM, Ctx, OS, CodeGenOpt::Default, false)) { + report_fatal_error("Target does not support MC emission!"); + } + + // Initialize passes. + // FIXME: When we support multiple modules, we'll want to move the code + // gen and finalization out of the constructor here and do it more + // on-demand as part of getPointerToFunction(). + PM.run(*M); + // Flush the output buffer so the SmallVector gets its data. + OS.flush(); + + // Load the object into the dynamic linker. + // FIXME: It would be nice to avoid making yet another copy. + MemoryBuffer *MB = MemoryBuffer::getMemBufferCopy(StringRef(Buffer.data(), + Buffer.size())); + if (Dyld.loadObject(MB)) + report_fatal_error(Dyld.getErrorString()); + // Resolve any relocations. + Dyld.resolveRelocations(); } MCJIT::~MCJIT() { + delete MemMgr; } void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) { @@ -73,8 +107,15 @@ void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) { } void *MCJIT::getPointerToFunction(Function *F) { - report_fatal_error("not yet implemented"); - return 0; + if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) { + bool AbortOnFailure = !F->hasExternalWeakLinkage(); + void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure); + addGlobalMapping(F, Addr); + return Addr; + } + + Twine Name = TM->getMCAsmInfo()->getGlobalPrefix() + F->getName(); + return (void*)Dyld.getSymbolAddress(Name.str()); } void *MCJIT::recompileAndRelinkFunction(Function *F) { @@ -87,6 +128,102 @@ void MCJIT::freeMachineCodeForFunction(Function *F) { GenericValue MCJIT::runFunction(Function *F, const std::vector<GenericValue> &ArgValues) { - report_fatal_error("not yet implemented"); + assert(F && "Function *F was null at entry to run()"); + + void *FPtr = getPointerToFunction(F); + assert(FPtr && "Pointer to fn's code was null after getPointerToFunction"); + const FunctionType *FTy = F->getFunctionType(); + const Type *RetTy = FTy->getReturnType(); + + assert((FTy->getNumParams() == ArgValues.size() || + (FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) && + "Wrong number of arguments passed into function!"); + assert(FTy->getNumParams() == ArgValues.size() && + "This doesn't support passing arguments through varargs (yet)!"); + + // Handle some common cases first. These cases correspond to common `main' + // prototypes. + if (RetTy->isIntegerTy(32) || RetTy->isVoidTy()) { + switch (ArgValues.size()) { + case 3: + if (FTy->getParamType(0)->isIntegerTy(32) && + FTy->getParamType(1)->isPointerTy() && + FTy->getParamType(2)->isPointerTy()) { + int (*PF)(int, char **, const char **) = + (int(*)(int, char **, const char **))(intptr_t)FPtr; + + // Call the function. + GenericValue rv; + rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), + (char **)GVTOP(ArgValues[1]), + (const char **)GVTOP(ArgValues[2]))); + return rv; + } + break; + case 2: + if (FTy->getParamType(0)->isIntegerTy(32) && + FTy->getParamType(1)->isPointerTy()) { + int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr; + + // Call the function. + GenericValue rv; + rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), + (char **)GVTOP(ArgValues[1]))); + return rv; + } + break; + case 1: + if (FTy->getNumParams() == 1 && + FTy->getParamType(0)->isIntegerTy(32)) { + GenericValue rv; + int (*PF)(int) = (int(*)(int))(intptr_t)FPtr; + rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue())); + return rv; + } + break; + } + } + + // Handle cases where no arguments are passed first. + if (ArgValues.empty()) { + GenericValue rv; + switch (RetTy->getTypeID()) { + default: llvm_unreachable("Unknown return type for function call!"); + case Type::IntegerTyID: { + unsigned BitWidth = cast<IntegerType>(RetTy)->getBitWidth(); + if (BitWidth == 1) + rv.IntVal = APInt(BitWidth, ((bool(*)())(intptr_t)FPtr)()); + else if (BitWidth <= 8) + rv.IntVal = APInt(BitWidth, ((char(*)())(intptr_t)FPtr)()); + else if (BitWidth <= 16) + rv.IntVal = APInt(BitWidth, ((short(*)())(intptr_t)FPtr)()); + else if (BitWidth <= 32) + rv.IntVal = APInt(BitWidth, ((int(*)())(intptr_t)FPtr)()); + else if (BitWidth <= 64) + rv.IntVal = APInt(BitWidth, ((int64_t(*)())(intptr_t)FPtr)()); + else + llvm_unreachable("Integer types > 64 bits not supported"); + return rv; + } + case Type::VoidTyID: + rv.IntVal = APInt(32, ((int(*)())(intptr_t)FPtr)()); + return rv; + case Type::FloatTyID: + rv.FloatVal = ((float(*)())(intptr_t)FPtr)(); + return rv; + case Type::DoubleTyID: + rv.DoubleVal = ((double(*)())(intptr_t)FPtr)(); + return rv; + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + llvm_unreachable("long double not supported yet"); + return rv; + case Type::PointerTyID: + return PTOGV(((void*(*)())(intptr_t)FPtr)()); + } + } + + assert("Full-featured argument passing not supported yet!"); return GenericValue(); } diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h index cd1f989..1b50766 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -10,14 +10,37 @@ #ifndef LLVM_LIB_EXECUTIONENGINE_MCJIT_H #define LLVM_LIB_EXECUTIONENGINE_MCJIT_H +#include "llvm/PassManager.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { +// FIXME: This makes all kinds of horrible assumptions for the time being, +// like only having one module, not needing to worry about multi-threading, +// blah blah. Purely in get-it-up-and-limping mode for now. + class MCJIT : public ExecutionEngine { - MCJIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, - JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, + MCJIT(Module *M, TargetMachine *tm, TargetJITInfo &tji, + RTDyldMemoryManager *MemMgr, CodeGenOpt::Level OptLevel, bool AllocateGVsWithCode); + + TargetMachine *TM; + MCContext *Ctx; + RTDyldMemoryManager *MemMgr; + + // FIXME: These may need moved to a separate 'jitstate' member like the + // non-MC JIT does for multithreading and such. Just keep them here for now. + PassManager PM; + Module *M; + // FIXME: This really doesn't belong here. + SmallVector<char, 4096> Buffer; // Working buffer into which we JIT. + raw_svector_ostream OS; + + RuntimeDyld Dyld; + public: ~MCJIT(); @@ -35,6 +58,16 @@ public: virtual GenericValue runFunction(Function *F, const std::vector<GenericValue> &ArgValues); + /// getPointerToNamedFunction - This method returns the address of the + /// specified function by using the dlsym function call. As such it is only + /// useful for resolving library symbols, not code generated symbols. + /// + /// If AbortOnFailure is false and no function with the given name is + /// found, this function silently returns a null pointer. Otherwise, + /// it prints a message to stderr and aborts. + /// + void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true); /// @} /// @name (Private) Registration Interfaces /// @{ diff --git a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h new file mode 100644 index 0000000..e3c6fda --- /dev/null +++ b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h @@ -0,0 +1,59 @@ +//===-- MCJITMemoryManager.h - Definition for the Memory Manager ---C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_EXECUTIONENGINE_MCJITMEMORYMANAGER_H +#define LLVM_LIB_EXECUTIONENGINE_MCJITMEMORYMANAGER_H + +#include "llvm/Module.h" +#include "llvm/ExecutionEngine/JITMemoryManager.h" +#include "llvm/ExecutionEngine/RuntimeDyld.h" +#include <assert.h> + +namespace llvm { + +// The MCJIT memory manager is a layer between the standard JITMemoryManager +// and the RuntimeDyld interface that maps objects, by name, onto their +// matching LLVM IR counterparts in the module(s) being compiled. +class MCJITMemoryManager : public RTDyldMemoryManager { + JITMemoryManager *JMM; + + // FIXME: Multiple modules. + Module *M; +public: + MCJITMemoryManager(JITMemoryManager *jmm) : JMM(jmm) {} + + // Allocate ActualSize bytes, or more, for the named function. Return + // a pointer to the allocated memory and update Size to reflect how much + // memory was acutally allocated. + uint8_t *startFunctionBody(const char *Name, uintptr_t &Size) { + // FIXME: This should really reference the MCAsmInfo to get the global + // prefix. + if (Name[0] == '_') ++Name; + Function *F = M->getFunction(Name); + assert(F && "No matching function in JIT IR Module!"); + return JMM->startFunctionBody(F, Size); + } + + // Mark the end of the function, including how much of the allocated + // memory was actually used. + void endFunctionBody(const char *Name, uint8_t *FunctionStart, + uint8_t *FunctionEnd) { + // FIXME: This should really reference the MCAsmInfo to get the global + // prefix. + if (Name[0] == '_') ++Name; + Function *F = M->getFunction(Name); + assert(F && "No matching function in JIT IR Module!"); + JMM->endFunctionBody(F, FunctionStart, FunctionEnd); + } + +}; + +} // End llvm namespace + +#endif diff --git a/lib/ExecutionEngine/Makefile b/lib/ExecutionEngine/Makefile index 1858d77..9a649a5 100644 --- a/lib/ExecutionEngine/Makefile +++ b/lib/ExecutionEngine/Makefile @@ -8,6 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../.. LIBRARYNAME = LLVMExecutionEngine -PARALLEL_DIRS = Interpreter JIT MCJIT +PARALLEL_DIRS = Interpreter JIT MCJIT RuntimeDyld include $(LEVEL)/Makefile.common diff --git a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt new file mode 100644 index 0000000..9e53f87 --- /dev/null +++ b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_library(LLVMRuntimeDyld + RuntimeDyld.cpp + ) diff --git a/lib/ExecutionEngine/RuntimeDyld/Makefile b/lib/ExecutionEngine/RuntimeDyld/Makefile new file mode 100644 index 0000000..5d6f26d --- /dev/null +++ b/lib/ExecutionEngine/RuntimeDyld/Makefile @@ -0,0 +1,13 @@ +##===- lib/ExecutionEngine/MCJIT/Makefile ------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMRuntimeDyld + +include $(LEVEL)/Makefile.common diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp new file mode 100644 index 0000000..065e5e3 --- /dev/null +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -0,0 +1,669 @@ +//===-- RuntimeDyld.h - Run-time dynamic linker for MC-JIT ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of the MC-JIT runtime dynamic linker. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dyld" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "llvm/Object/MachOObject.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Memory.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/system_error.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; +using namespace llvm::object; + +// Empty out-of-line virtual destructor as the key function. +RTDyldMemoryManager::~RTDyldMemoryManager() {} + +namespace llvm { +class RuntimeDyldImpl { + unsigned CPUType; + unsigned CPUSubtype; + + // The MemoryManager to load objects into. + RTDyldMemoryManager *MemMgr; + + // FIXME: This all assumes we're dealing with external symbols for anything + // explicitly referenced. I.e., we can index by name and things + // will work out. In practice, this may not be the case, so we + // should find a way to effectively generalize. + + // For each function, we have a MemoryBlock of it's instruction data. + StringMap<sys::MemoryBlock> Functions; + + // Master symbol table. As modules are loaded and external symbols are + // resolved, their addresses are stored here. + StringMap<uint8_t*> SymbolTable; + + // For each symbol, keep a list of relocations based on it. Anytime + // its address is reassigned (the JIT re-compiled the function, e.g.), + // the relocations get re-resolved. + struct RelocationEntry { + std::string Target; // Object this relocation is contained in. + uint64_t Offset; // Offset into the object for the relocation. + uint32_t Data; // Second word of the raw macho relocation entry. + int64_t Addend; // Addend encoded in the instruction itself, if any. + bool isResolved; // Has this relocation been resolved previously? + + RelocationEntry(StringRef t, uint64_t offset, uint32_t data, int64_t addend) + : Target(t), Offset(offset), Data(data), Addend(addend), + isResolved(false) {} + }; + typedef SmallVector<RelocationEntry, 4> RelocationList; + StringMap<RelocationList> Relocations; + + // FIXME: Also keep a map of all the relocations contained in an object. Use + // this to dynamically answer whether all of the relocations in it have + // been resolved or not. + + bool HasError; + std::string ErrorStr; + + // Set the error state and record an error string. + bool Error(const Twine &Msg) { + ErrorStr = Msg.str(); + HasError = true; + return true; + } + + void extractFunction(StringRef Name, uint8_t *StartAddress, + uint8_t *EndAddress); + bool resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel, + unsigned Type, unsigned Size); + bool resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel, + unsigned Type, unsigned Size); + bool resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel, + unsigned Type, unsigned Size); + + bool loadSegment32(const MachOObject *Obj, + const MachOObject::LoadCommandInfo *SegmentLCI, + const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC); + bool loadSegment64(const MachOObject *Obj, + const MachOObject::LoadCommandInfo *SegmentLCI, + const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC); + +public: + RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {} + + bool loadObject(MemoryBuffer *InputBuffer); + + void *getSymbolAddress(StringRef Name) { + // FIXME: Just look up as a function for now. Overly simple of course. + // Work in progress. + return SymbolTable.lookup(Name); + } + + void resolveRelocations(); + + void reassignSymbolAddress(StringRef Name, uint8_t *Addr); + + // Is the linker in an error state? + bool hasError() { return HasError; } + + // Mark the error condition as handled and continue. + void clearError() { HasError = false; } + + // Get the error message. + StringRef getErrorString() { return ErrorStr; } +}; + +void RuntimeDyldImpl::extractFunction(StringRef Name, uint8_t *StartAddress, + uint8_t *EndAddress) { + // Allocate memory for the function via the memory manager. + uintptr_t Size = EndAddress - StartAddress + 1; + uint8_t *Mem = MemMgr->startFunctionBody(Name.data(), Size); + assert(Size >= (uint64_t)(EndAddress - StartAddress + 1) && + "Memory manager failed to allocate enough memory!"); + // Copy the function payload into the memory block. + memcpy(Mem, StartAddress, EndAddress - StartAddress + 1); + MemMgr->endFunctionBody(Name.data(), Mem, Mem + Size); + // Remember where we put it. + Functions[Name] = sys::MemoryBlock(Mem, Size); + // Default the assigned address for this symbol to wherever this + // allocated it. + SymbolTable[Name] = Mem; + DEBUG(dbgs() << " allocated to " << Mem << "\n"); +} + +bool RuntimeDyldImpl:: +resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel, + unsigned Type, unsigned Size) { + // This just dispatches to the proper target specific routine. + switch (CPUType) { + default: assert(0 && "Unsupported CPU type!"); + case mach::CTM_x86_64: + return resolveX86_64Relocation((uintptr_t)Address, (uintptr_t)Value, + isPCRel, Type, Size); + case mach::CTM_ARM: + return resolveARMRelocation((uintptr_t)Address, (uintptr_t)Value, + isPCRel, Type, Size); + } + llvm_unreachable(""); +} + +bool RuntimeDyldImpl:: +resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, + bool isPCRel, unsigned Type, + unsigned Size) { + // If the relocation is PC-relative, the value to be encoded is the + // pointer difference. + if (isPCRel) + // FIXME: It seems this value needs to be adjusted by 4 for an effective PC + // address. Is that expected? Only for branches, perhaps? + Value -= Address + 4; + + switch(Type) { + default: + llvm_unreachable("Invalid relocation type!"); + case macho::RIT_X86_64_Unsigned: + case macho::RIT_X86_64_Branch: { + // Mask in the target value a byte at a time (we don't have an alignment + // guarantee for the target address, so this is safest). + uint8_t *p = (uint8_t*)Address; + for (unsigned i = 0; i < Size; ++i) { + *p++ = (uint8_t)Value; + Value >>= 8; + } + return false; + } + case macho::RIT_X86_64_Signed: + case macho::RIT_X86_64_GOTLoad: + case macho::RIT_X86_64_GOT: + case macho::RIT_X86_64_Subtractor: + case macho::RIT_X86_64_Signed1: + case macho::RIT_X86_64_Signed2: + case macho::RIT_X86_64_Signed4: + case macho::RIT_X86_64_TLV: + return Error("Relocation type not implemented yet!"); + } + return false; +} + +bool RuntimeDyldImpl::resolveARMRelocation(uintptr_t Address, uintptr_t Value, + bool isPCRel, unsigned Type, + unsigned Size) { + // If the relocation is PC-relative, the value to be encoded is the + // pointer difference. + if (isPCRel) { + Value -= Address; + // ARM PCRel relocations have an effective-PC offset of two instructions + // (four bytes in Thumb mode, 8 bytes in ARM mode). + // FIXME: For now, assume ARM mode. + Value -= 8; + } + + switch(Type) { + default: + llvm_unreachable("Invalid relocation type!"); + case macho::RIT_Vanilla: { + llvm_unreachable("Invalid relocation type!"); + // Mask in the target value a byte at a time (we don't have an alignment + // guarantee for the target address, so this is safest). + uint8_t *p = (uint8_t*)Address; + for (unsigned i = 0; i < Size; ++i) { + *p++ = (uint8_t)Value; + Value >>= 8; + } + break; + } + case macho::RIT_ARM_Branch24Bit: { + // Mask the value into the target address. We know instructions are + // 32-bit aligned, so we can do it all at once. + uint32_t *p = (uint32_t*)Address; + // The low two bits of the value are not encoded. + Value >>= 2; + // Mask the value to 24 bits. + Value &= 0xffffff; + // FIXME: If the destination is a Thumb function (and the instruction + // is a non-predicated BL instruction), we need to change it to a BLX + // instruction instead. + + // Insert the value into the instruction. + *p = (*p & ~0xffffff) | Value; + break; + } + case macho::RIT_ARM_ThumbBranch22Bit: + case macho::RIT_ARM_ThumbBranch32Bit: + case macho::RIT_ARM_Half: + case macho::RIT_ARM_HalfDifference: + case macho::RIT_Pair: + case macho::RIT_Difference: + case macho::RIT_ARM_LocalDifference: + case macho::RIT_ARM_PreboundLazyPointer: + return Error("Relocation type not implemented yet!"); + } + return false; +} + +bool RuntimeDyldImpl:: +loadSegment32(const MachOObject *Obj, + const MachOObject::LoadCommandInfo *SegmentLCI, + const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) { + InMemoryStruct<macho::SegmentLoadCommand> SegmentLC; + Obj->ReadSegmentLoadCommand(*SegmentLCI, SegmentLC); + if (!SegmentLC) + return Error("unable to load segment load command"); + + for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) { + InMemoryStruct<macho::Section> Sect; + Obj->ReadSection(*SegmentLCI, SectNum, Sect); + if (!Sect) + return Error("unable to load section: '" + Twine(SectNum) + "'"); + + // FIXME: Improve check. + if (Sect->Flags != 0x80000400) + return Error("unsupported section type!"); + + // Address and names of symbols in the section. + typedef std::pair<uint64_t, StringRef> SymbolEntry; + SmallVector<SymbolEntry, 64> Symbols; + // Index of all the names, in this section or not. Used when we're + // dealing with relocation entries. + SmallVector<StringRef, 64> SymbolNames; + for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) { + InMemoryStruct<macho::SymbolTableEntry> STE; + Obj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE); + if (!STE) + return Error("unable to read symbol: '" + Twine(i) + "'"); + if (STE->SectionIndex > SegmentLC->NumSections) + return Error("invalid section index for symbol: '" + Twine(i) + "'"); + // Get the symbol name. + StringRef Name = Obj->getStringAtIndex(STE->StringIndex); + SymbolNames.push_back(Name); + + // Just skip symbols not defined in this section. + if ((unsigned)STE->SectionIndex - 1 != SectNum) + continue; + + // FIXME: Check the symbol type and flags. + if (STE->Type != 0xF) // external, defined in this section. + return Error("unexpected symbol type!"); + // Flags == 0x8 marks a thumb function for ARM, which is fine as it + // doesn't require any special handling here. + if (STE->Flags != 0x0 && STE->Flags != 0x8) + return Error("unexpected symbol type!"); + + // Remember the symbol. + Symbols.push_back(SymbolEntry(STE->Value, Name)); + + DEBUG(dbgs() << "Function sym: '" << Name << "' @ " << + (Sect->Address + STE->Value) << "\n"); + } + // Sort the symbols by address, just in case they didn't come in that way. + array_pod_sort(Symbols.begin(), Symbols.end()); + + // Extract the function data. + uint8_t *Base = (uint8_t*)Obj->getData(SegmentLC->FileOffset, + SegmentLC->FileSize).data(); + for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) { + uint64_t StartOffset = Sect->Address + Symbols[i].first; + uint64_t EndOffset = Symbols[i + 1].first - 1; + DEBUG(dbgs() << "Extracting function: " << Symbols[i].second + << " from [" << StartOffset << ", " << EndOffset << "]\n"); + extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset); + } + // The last symbol we do after since the end address is calculated + // differently because there is no next symbol to reference. + uint64_t StartOffset = Symbols[Symbols.size() - 1].first; + uint64_t EndOffset = Sect->Size - 1; + DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second + << " from [" << StartOffset << ", " << EndOffset << "]\n"); + extractFunction(Symbols[Symbols.size()-1].second, + Base + StartOffset, Base + EndOffset); + + // Now extract the relocation information for each function and process it. + for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) { + InMemoryStruct<macho::RelocationEntry> RE; + Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE); + if (RE->Word0 & macho::RF_Scattered) + return Error("NOT YET IMPLEMENTED: scattered relocations."); + // Word0 of the relocation is the offset into the section where the + // relocation should be applied. We need to translate that into an + // offset into a function since that's our atom. + uint32_t Offset = RE->Word0; + // Look for the function containing the address. This is used for JIT + // code, so the number of functions in section is almost always going + // to be very small (usually just one), so until we have use cases + // where that's not true, just use a trivial linear search. + unsigned SymbolNum; + unsigned NumSymbols = Symbols.size(); + assert(NumSymbols > 0 && Symbols[0].first <= Offset && + "No symbol containing relocation!"); + for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum) + if (Symbols[SymbolNum + 1].first > Offset) + break; + // Adjust the offset to be relative to the symbol. + Offset -= Symbols[SymbolNum].first; + // Get the name of the symbol containing the relocation. + StringRef TargetName = SymbolNames[SymbolNum]; + + bool isExtern = (RE->Word1 >> 27) & 1; + // Figure out the source symbol of the relocation. If isExtern is true, + // this relocation references the symbol table, otherwise it references + // a section in the same object, numbered from 1 through NumSections + // (SectionBases is [0, NumSections-1]). + // FIXME: Some targets (ARM) use internal relocations even for + // externally visible symbols, if the definition is in the same + // file as the reference. We need to convert those back to by-name + // references. We can resolve the address based on the section + // offset and see if we have a symbol at that address. If we do, + // use that; otherwise, puke. + if (!isExtern) + return Error("Internal relocations not supported."); + uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value + StringRef SourceName = SymbolNames[SourceNum]; + + // FIXME: Get the relocation addend from the target address. + + // Now store the relocation information. Associate it with the source + // symbol. + Relocations[SourceName].push_back(RelocationEntry(TargetName, + Offset, + RE->Word1, + 0 /*Addend*/)); + DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset + << " from '" << SourceName << "(Word1: " + << format("0x%x", RE->Word1) << ")\n"); + } + } + return false; +} + + +bool RuntimeDyldImpl:: +loadSegment64(const MachOObject *Obj, + const MachOObject::LoadCommandInfo *SegmentLCI, + const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) { + InMemoryStruct<macho::Segment64LoadCommand> Segment64LC; + Obj->ReadSegment64LoadCommand(*SegmentLCI, Segment64LC); + if (!Segment64LC) + return Error("unable to load segment load command"); + + for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) { + InMemoryStruct<macho::Section64> Sect; + Obj->ReadSection64(*SegmentLCI, SectNum, Sect); + if (!Sect) + return Error("unable to load section: '" + Twine(SectNum) + "'"); + + // FIXME: Improve check. + if (Sect->Flags != 0x80000400) + return Error("unsupported section type!"); + + // Address and names of symbols in the section. + typedef std::pair<uint64_t, StringRef> SymbolEntry; + SmallVector<SymbolEntry, 64> Symbols; + // Index of all the names, in this section or not. Used when we're + // dealing with relocation entries. + SmallVector<StringRef, 64> SymbolNames; + for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) { + InMemoryStruct<macho::Symbol64TableEntry> STE; + Obj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE); + if (!STE) + return Error("unable to read symbol: '" + Twine(i) + "'"); + if (STE->SectionIndex > Segment64LC->NumSections) + return Error("invalid section index for symbol: '" + Twine(i) + "'"); + // Get the symbol name. + StringRef Name = Obj->getStringAtIndex(STE->StringIndex); + SymbolNames.push_back(Name); + + // Just skip symbols not defined in this section. + if ((unsigned)STE->SectionIndex - 1 != SectNum) + continue; + + // FIXME: Check the symbol type and flags. + if (STE->Type != 0xF) // external, defined in this section. + return Error("unexpected symbol type!"); + if (STE->Flags != 0x0) + return Error("unexpected symbol type!"); + + // Remember the symbol. + Symbols.push_back(SymbolEntry(STE->Value, Name)); + + DEBUG(dbgs() << "Function sym: '" << Name << "' @ " << + (Sect->Address + STE->Value) << "\n"); + } + // Sort the symbols by address, just in case they didn't come in that way. + array_pod_sort(Symbols.begin(), Symbols.end()); + + // Extract the function data. + uint8_t *Base = (uint8_t*)Obj->getData(Segment64LC->FileOffset, + Segment64LC->FileSize).data(); + for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) { + uint64_t StartOffset = Sect->Address + Symbols[i].first; + uint64_t EndOffset = Symbols[i + 1].first - 1; + DEBUG(dbgs() << "Extracting function: " << Symbols[i].second + << " from [" << StartOffset << ", " << EndOffset << "]\n"); + extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset); + } + // The last symbol we do after since the end address is calculated + // differently because there is no next symbol to reference. + uint64_t StartOffset = Symbols[Symbols.size() - 1].first; + uint64_t EndOffset = Sect->Size - 1; + DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second + << " from [" << StartOffset << ", " << EndOffset << "]\n"); + extractFunction(Symbols[Symbols.size()-1].second, + Base + StartOffset, Base + EndOffset); + + // Now extract the relocation information for each function and process it. + for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) { + InMemoryStruct<macho::RelocationEntry> RE; + Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE); + if (RE->Word0 & macho::RF_Scattered) + return Error("NOT YET IMPLEMENTED: scattered relocations."); + // Word0 of the relocation is the offset into the section where the + // relocation should be applied. We need to translate that into an + // offset into a function since that's our atom. + uint32_t Offset = RE->Word0; + // Look for the function containing the address. This is used for JIT + // code, so the number of functions in section is almost always going + // to be very small (usually just one), so until we have use cases + // where that's not true, just use a trivial linear search. + unsigned SymbolNum; + unsigned NumSymbols = Symbols.size(); + assert(NumSymbols > 0 && Symbols[0].first <= Offset && + "No symbol containing relocation!"); + for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum) + if (Symbols[SymbolNum + 1].first > Offset) + break; + // Adjust the offset to be relative to the symbol. + Offset -= Symbols[SymbolNum].first; + // Get the name of the symbol containing the relocation. + StringRef TargetName = SymbolNames[SymbolNum]; + + bool isExtern = (RE->Word1 >> 27) & 1; + // Figure out the source symbol of the relocation. If isExtern is true, + // this relocation references the symbol table, otherwise it references + // a section in the same object, numbered from 1 through NumSections + // (SectionBases is [0, NumSections-1]). + if (!isExtern) + return Error("Internal relocations not supported."); + uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value + StringRef SourceName = SymbolNames[SourceNum]; + + // FIXME: Get the relocation addend from the target address. + + // Now store the relocation information. Associate it with the source + // symbol. + Relocations[SourceName].push_back(RelocationEntry(TargetName, + Offset, + RE->Word1, + 0 /*Addend*/)); + DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset + << " from '" << SourceName << "(Word1: " + << format("0x%x", RE->Word1) << ")\n"); + } + } + return false; +} + +bool RuntimeDyldImpl::loadObject(MemoryBuffer *InputBuffer) { + // If the linker is in an error state, don't do anything. + if (hasError()) + return true; + // Load the Mach-O wrapper object. + std::string ErrorStr; + OwningPtr<MachOObject> Obj( + MachOObject::LoadFromBuffer(InputBuffer, &ErrorStr)); + if (!Obj) + return Error("unable to load object: '" + ErrorStr + "'"); + + // Get the CPU type information from the header. + const macho::Header &Header = Obj->getHeader(); + + // FIXME: Error checking that the loaded object is compatible with + // the system we're running on. + CPUType = Header.CPUType; + CPUSubtype = Header.CPUSubtype; + + // Validate that the load commands match what we expect. + const MachOObject::LoadCommandInfo *SegmentLCI = 0, *SymtabLCI = 0, + *DysymtabLCI = 0; + for (unsigned i = 0; i != Header.NumLoadCommands; ++i) { + const MachOObject::LoadCommandInfo &LCI = Obj->getLoadCommandInfo(i); + switch (LCI.Command.Type) { + case macho::LCT_Segment: + case macho::LCT_Segment64: + if (SegmentLCI) + return Error("unexpected input object (multiple segments)"); + SegmentLCI = &LCI; + break; + case macho::LCT_Symtab: + if (SymtabLCI) + return Error("unexpected input object (multiple symbol tables)"); + SymtabLCI = &LCI; + break; + case macho::LCT_Dysymtab: + if (DysymtabLCI) + return Error("unexpected input object (multiple symbol tables)"); + DysymtabLCI = &LCI; + break; + default: + return Error("unexpected input object (unexpected load command"); + } + } + + if (!SymtabLCI) + return Error("no symbol table found in object"); + if (!SegmentLCI) + return Error("no symbol table found in object"); + + // Read and register the symbol table data. + InMemoryStruct<macho::SymtabLoadCommand> SymtabLC; + Obj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC); + if (!SymtabLC) + return Error("unable to load symbol table load command"); + Obj->RegisterStringTable(*SymtabLC); + + // Read the dynamic link-edit information, if present (not present in static + // objects). + if (DysymtabLCI) { + InMemoryStruct<macho::DysymtabLoadCommand> DysymtabLC; + Obj->ReadDysymtabLoadCommand(*DysymtabLCI, DysymtabLC); + if (!DysymtabLC) + return Error("unable to load dynamic link-exit load command"); + + // FIXME: We don't support anything interesting yet. +// if (DysymtabLC->LocalSymbolsIndex != 0) +// return Error("NOT YET IMPLEMENTED: local symbol entries"); +// if (DysymtabLC->ExternalSymbolsIndex != 0) +// return Error("NOT YET IMPLEMENTED: non-external symbol entries"); +// if (DysymtabLC->UndefinedSymbolsIndex != SymtabLC->NumSymbolTableEntries) +// return Error("NOT YET IMPLEMENTED: undefined symbol entries"); + } + + // Load the segment load command. + if (SegmentLCI->Command.Type == macho::LCT_Segment) { + if (loadSegment32(Obj.get(), SegmentLCI, SymtabLC)) + return true; + } else { + if (loadSegment64(Obj.get(), SegmentLCI, SymtabLC)) + return true; + } + + return false; +} + +// Resolve the relocations for all symbols we currently know about. +void RuntimeDyldImpl::resolveRelocations() { + // Just iterate over the symbols in our symbol table and assign their + // addresses. + StringMap<uint8_t*>::iterator i = SymbolTable.begin(); + StringMap<uint8_t*>::iterator e = SymbolTable.end(); + for (;i != e; ++i) + reassignSymbolAddress(i->getKey(), i->getValue()); +} + +// Assign an address to a symbol name and resolve all the relocations +// associated with it. +void RuntimeDyldImpl::reassignSymbolAddress(StringRef Name, uint8_t *Addr) { + // Assign the address in our symbol table. + SymbolTable[Name] = Addr; + + RelocationList &Relocs = Relocations[Name]; + for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { + RelocationEntry &RE = Relocs[i]; + uint8_t *Target = SymbolTable[RE.Target] + RE.Offset; + bool isPCRel = (RE.Data >> 24) & 1; + unsigned Type = (RE.Data >> 28) & 0xf; + unsigned Size = 1 << ((RE.Data >> 25) & 3); + + DEBUG(dbgs() << "Resolving relocation at '" << RE.Target + << "' + " << RE.Offset << " (" << format("%p", Target) << ")" + << " from '" << Name << " (" << format("%p", Addr) << ")" + << "(" << (isPCRel ? "pcrel" : "absolute") + << ", type: " << Type << ", Size: " << Size << ").\n"); + + resolveRelocation(Target, Addr, isPCRel, Type, Size); + RE.isResolved = true; + } +} + +//===----------------------------------------------------------------------===// +// RuntimeDyld class implementation +RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *MM) { + Dyld = new RuntimeDyldImpl(MM); +} + +RuntimeDyld::~RuntimeDyld() { + delete Dyld; +} + +bool RuntimeDyld::loadObject(MemoryBuffer *InputBuffer) { + return Dyld->loadObject(InputBuffer); +} + +void *RuntimeDyld::getSymbolAddress(StringRef Name) { + return Dyld->getSymbolAddress(Name); +} + +void RuntimeDyld::resolveRelocations() { + Dyld->resolveRelocations(); +} + +void RuntimeDyld::reassignSymbolAddress(StringRef Name, uint8_t *Addr) { + Dyld->reassignSymbolAddress(Name, Addr); +} + +StringRef RuntimeDyld::getErrorString() { + return Dyld->getErrorString(); +} + +} // end namespace llvm diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 5aa06ab..f372db2 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -505,6 +505,7 @@ static bool LinkGlobals(Module *Dest, const Module *Src, SGV->getType()->getAddressSpace()); // Propagate alignment, visibility and section info. CopyGVAttributes(NewDGV, SGV); + NewDGV->setUnnamedAddr(SGV->hasUnnamedAddr()); // If the LLVM runtime renamed the global, but it is an externally visible // symbol, DGV must be an existing global with internal linkage. Rename diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index f1811a1..6aed059 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_library(LLVMMC MCCodeEmitter.cpp MCContext.cpp MCDisassembler.cpp + MCELF.cpp MCELFObjectTargetWriter.cpp MCELFStreamer.cpp MCExpr.cpp diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index ea1629d..23c6d4c 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -11,20 +11,14 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "ELFObjectWriter.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Twine.h" -#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELFSymbolFlags.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCELFObjectWriter.h" -#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -38,39 +32,14 @@ #include <vector> using namespace llvm; -static unsigned GetType(const MCSymbolData &SD) { - uint32_t Type = (SD.getFlags() & (0xf << ELF_STT_Shift)) >> ELF_STT_Shift; - assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT || - Type == ELF::STT_FUNC || Type == ELF::STT_SECTION || - Type == ELF::STT_FILE || Type == ELF::STT_COMMON || - Type == ELF::STT_TLS); - return Type; -} - -static unsigned GetBinding(const MCSymbolData &SD) { - uint32_t Binding = (SD.getFlags() & (0xf << ELF_STB_Shift)) >> ELF_STB_Shift; - assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL || - Binding == ELF::STB_WEAK); - return Binding; -} - -static void SetBinding(MCSymbolData &SD, unsigned Binding) { - assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL || - Binding == ELF::STB_WEAK); - uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STB_Shift); - SD.setFlags(OtherFlags | (Binding << ELF_STB_Shift)); -} +bool ELFObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) { + const MCFixupKindInfo &FKI = + Asm.getBackend().getFixupKindInfo((MCFixupKind) Kind); -static unsigned GetVisibility(MCSymbolData &SD) { - unsigned Visibility = - (SD.getFlags() & (0xf << ELF_STV_Shift)) >> ELF_STV_Shift; - assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL || - Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED); - return Visibility; + return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel; } - -static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant) { +bool ELFObjectWriter::RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant) { switch (Variant) { default: return false; @@ -90,345 +59,6 @@ static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant) { } } -static bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) { - const MCFixupKindInfo &FKI = - Asm.getBackend().getFixupKindInfo((MCFixupKind) Kind); - - return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel; -} - -namespace { - class ELFObjectWriter : public MCObjectWriter { - protected: - /*static bool isFixupKindX86RIPRel(unsigned Kind) { - return Kind == X86::reloc_riprel_4byte || - Kind == X86::reloc_riprel_4byte_movq_load; - }*/ - - - /// ELFSymbolData - Helper struct for containing some precomputed information - /// on symbols. - struct ELFSymbolData { - MCSymbolData *SymbolData; - uint64_t StringIndex; - uint32_t SectionIndex; - - // Support lexicographic sorting. - bool operator<(const ELFSymbolData &RHS) const { - if (GetType(*SymbolData) == ELF::STT_FILE) - return true; - if (GetType(*RHS.SymbolData) == ELF::STT_FILE) - return false; - return SymbolData->getSymbol().getName() < - RHS.SymbolData->getSymbol().getName(); - } - }; - - /// @name Relocation Data - /// @{ - - struct ELFRelocationEntry { - // Make these big enough for both 32-bit and 64-bit - uint64_t r_offset; - int Index; - unsigned Type; - const MCSymbol *Symbol; - uint64_t r_addend; - - ELFRelocationEntry() - : r_offset(0), Index(0), Type(0), Symbol(0), r_addend(0) {} - - ELFRelocationEntry(uint64_t RelocOffset, int Idx, - unsigned RelType, const MCSymbol *Sym, - uint64_t Addend) - : r_offset(RelocOffset), Index(Idx), Type(RelType), - Symbol(Sym), r_addend(Addend) {} - - // Support lexicographic sorting. - bool operator<(const ELFRelocationEntry &RE) const { - return RE.r_offset < r_offset; - } - }; - - /// The target specific ELF writer instance. - llvm::OwningPtr<MCELFObjectTargetWriter> TargetObjectWriter; - - SmallPtrSet<const MCSymbol *, 16> UsedInReloc; - SmallPtrSet<const MCSymbol *, 16> WeakrefUsedInReloc; - DenseMap<const MCSymbol *, const MCSymbol *> Renames; - - llvm::DenseMap<const MCSectionData*, - std::vector<ELFRelocationEntry> > Relocations; - DenseMap<const MCSection*, uint64_t> SectionStringTableIndex; - - /// @} - /// @name Symbol Table Data - /// @{ - - SmallString<256> StringTable; - std::vector<ELFSymbolData> LocalSymbolData; - std::vector<ELFSymbolData> ExternalSymbolData; - std::vector<ELFSymbolData> UndefinedSymbolData; - - /// @} - - bool NeedsGOT; - - bool NeedsSymtabShndx; - - // This holds the symbol table index of the last local symbol. - unsigned LastLocalSymbolIndex; - // This holds the .strtab section index. - unsigned StringTableIndex; - // This holds the .symtab section index. - unsigned SymbolTableIndex; - - unsigned ShstrtabIndex; - - - const MCSymbol *SymbolToReloc(const MCAssembler &Asm, - const MCValue &Target, - const MCFragment &F) const; - - // For arch-specific emission of explicit reloc symbol - virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, - const MCValue &Target, - const MCFragment &F, - bool IsBSS) const { - return NULL; - } - - bool is64Bit() const { return TargetObjectWriter->is64Bit(); } - bool hasRelocationAddend() const { - return TargetObjectWriter->hasRelocationAddend(); - } - - public: - ELFObjectWriter(MCELFObjectTargetWriter *MOTW, - raw_ostream &_OS, bool IsLittleEndian) - : MCObjectWriter(_OS, IsLittleEndian), - TargetObjectWriter(MOTW), - NeedsGOT(false), NeedsSymtabShndx(false){ - } - - virtual ~ELFObjectWriter(); - - void WriteWord(uint64_t W) { - if (is64Bit()) - Write64(W); - else - Write32(W); - } - - void StringLE16(char *buf, uint16_t Value) { - buf[0] = char(Value >> 0); - buf[1] = char(Value >> 8); - } - - void StringLE32(char *buf, uint32_t Value) { - StringLE16(buf, uint16_t(Value >> 0)); - StringLE16(buf + 2, uint16_t(Value >> 16)); - } - - void StringLE64(char *buf, uint64_t Value) { - StringLE32(buf, uint32_t(Value >> 0)); - StringLE32(buf + 4, uint32_t(Value >> 32)); - } - - void StringBE16(char *buf ,uint16_t Value) { - buf[0] = char(Value >> 8); - buf[1] = char(Value >> 0); - } - - void StringBE32(char *buf, uint32_t Value) { - StringBE16(buf, uint16_t(Value >> 16)); - StringBE16(buf + 2, uint16_t(Value >> 0)); - } - - void StringBE64(char *buf, uint64_t Value) { - StringBE32(buf, uint32_t(Value >> 32)); - StringBE32(buf + 4, uint32_t(Value >> 0)); - } - - void String8(MCDataFragment &F, uint8_t Value) { - char buf[1]; - buf[0] = Value; - F.getContents() += StringRef(buf, 1); - } - - void String16(MCDataFragment &F, uint16_t Value) { - char buf[2]; - if (isLittleEndian()) - StringLE16(buf, Value); - else - StringBE16(buf, Value); - F.getContents() += StringRef(buf, 2); - } - - void String32(MCDataFragment &F, uint32_t Value) { - char buf[4]; - if (isLittleEndian()) - StringLE32(buf, Value); - else - StringBE32(buf, Value); - F.getContents() += StringRef(buf, 4); - } - - void String64(MCDataFragment &F, uint64_t Value) { - char buf[8]; - if (isLittleEndian()) - StringLE64(buf, Value); - else - StringBE64(buf, Value); - F.getContents() += StringRef(buf, 8); - } - - virtual void WriteHeader(uint64_t SectionDataSize, unsigned NumberOfSections); - - /// Default e_flags = 0 - virtual void WriteEFlags() { Write32(0); } - - virtual void WriteSymbolEntry(MCDataFragment *SymtabF, MCDataFragment *ShndxF, - uint64_t name, uint8_t info, - uint64_t value, uint64_t size, - uint8_t other, uint32_t shndx, - bool Reserved); - - virtual void WriteSymbol(MCDataFragment *SymtabF, MCDataFragment *ShndxF, - ELFSymbolData &MSD, - const MCAsmLayout &Layout); - - typedef DenseMap<const MCSectionELF*, uint32_t> SectionIndexMapTy; - virtual void WriteSymbolTable(MCDataFragment *SymtabF, MCDataFragment *ShndxF, - const MCAssembler &Asm, - const MCAsmLayout &Layout, - const SectionIndexMapTy &SectionIndexMap); - - virtual void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, uint64_t &FixedValue); - - virtual uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm, - const MCSymbol *S); - - // Map from a group section to the signature symbol - typedef DenseMap<const MCSectionELF*, const MCSymbol*> GroupMapTy; - // Map from a signature symbol to the group section - typedef DenseMap<const MCSymbol*, const MCSectionELF*> RevGroupMapTy; - - /// ComputeSymbolTable - Compute the symbol table data - /// - /// \param StringTable [out] - The string table data. - /// \param StringIndexMap [out] - Map from symbol names to offsets in the - /// string table. - virtual void ComputeSymbolTable(MCAssembler &Asm, - const SectionIndexMapTy &SectionIndexMap, - RevGroupMapTy RevGroupMap); - - virtual void ComputeIndexMap(MCAssembler &Asm, - SectionIndexMapTy &SectionIndexMap); - - virtual void WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout, - const MCSectionData &SD); - - virtual void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout) { - for (MCAssembler::const_iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it) { - WriteRelocation(Asm, Layout, *it); - } - } - - virtual void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout, - const SectionIndexMapTy &SectionIndexMap); - - // Create the sections that show up in the symbol table. Currently - // those are the .note.GNU-stack section and the group sections. - virtual void CreateIndexedSections(MCAssembler &Asm, MCAsmLayout &Layout, - GroupMapTy &GroupMap, - RevGroupMapTy &RevGroupMap); - - virtual void ExecutePostLayoutBinding(MCAssembler &Asm, - const MCAsmLayout &Layout); - - virtual void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags, - uint64_t Address, uint64_t Offset, - uint64_t Size, uint32_t Link, uint32_t Info, - uint64_t Alignment, uint64_t EntrySize); - - virtual void WriteRelocationsFragment(const MCAssembler &Asm, - MCDataFragment *F, - const MCSectionData *SD); - - virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout); - virtual void WriteSection(MCAssembler &Asm, - const SectionIndexMapTy &SectionIndexMap, - uint32_t GroupSymbolIndex, - uint64_t Offset, uint64_t Size, uint64_t Alignment, - const MCSectionELF &Section); - - protected: - virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel, bool IsRelocWithSymbol, - int64_t Addend) = 0; - }; - - //===- X86ELFObjectWriter -------------------------------------------===// - - class X86ELFObjectWriter : public ELFObjectWriter { - public: - X86ELFObjectWriter(MCELFObjectTargetWriter *MOTW, - raw_ostream &_OS, - bool IsLittleEndian); - - virtual ~X86ELFObjectWriter(); - protected: - virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel, bool IsRelocWithSymbol, - int64_t Addend); - }; - - - //===- ARMELFObjectWriter -------------------------------------------===// - - class ARMELFObjectWriter : public ELFObjectWriter { - public: - // FIXME: MCAssembler can't yet return the Subtarget, - enum { DefaultEABIVersion = 0x05000000U }; - - ARMELFObjectWriter(MCELFObjectTargetWriter *MOTW, - raw_ostream &_OS, - bool IsLittleEndian); - - virtual ~ARMELFObjectWriter(); - - virtual void WriteEFlags(); - protected: - virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, - const MCValue &Target, - const MCFragment &F, - bool IsBSS) const; - - virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel, bool IsRelocWithSymbol, - int64_t Addend); - }; - - //===- MBlazeELFObjectWriter -------------------------------------------===// - - class MBlazeELFObjectWriter : public ELFObjectWriter { - public: - MBlazeELFObjectWriter(MCELFObjectTargetWriter *MOTW, - raw_ostream &_OS, - bool IsLittleEndian); - - virtual ~MBlazeELFObjectWriter(); - protected: - virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel, bool IsRelocWithSymbol, - int64_t Addend); - }; -} - ELFObjectWriter::~ELFObjectWriter() {} @@ -533,7 +163,8 @@ void ELFObjectWriter::WriteSymbolEntry(MCDataFragment *SymtabF, } } -static uint64_t SymbolValue(MCSymbolData &Data, const MCAsmLayout &Layout) { +uint64_t ELFObjectWriter::SymbolValue(MCSymbolData &Data, + const MCAsmLayout &Layout) { if (Data.isCommon() && Data.isExternal()) return Data.getCommonAlignment(); @@ -579,7 +210,7 @@ void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm, // Aliases defined with .symvar copy the binding from the symbol they alias. // This is the first place we are able to copy this information. it->setExternal(SD.isExternal()); - SetBinding(*it, GetBinding(SD)); + MCELF::SetBinding(*it, MCELF::GetBinding(SD)); StringRef Rest = AliasName.substr(Pos); if (!Symbol.isUndefined() && !Rest.startswith("@@@")) @@ -605,9 +236,9 @@ void ELFObjectWriter::WriteSymbol(MCDataFragment *SymtabF, bool IsReserved = Data.isCommon() || Data.getSymbol().isAbsolute() || Data.getSymbol().isVariable(); - uint8_t Binding = GetBinding(OrigData); - uint8_t Visibility = GetVisibility(OrigData); - uint8_t Type = GetType(Data); + uint8_t Binding = MCELF::GetBinding(OrigData); + uint8_t Visibility = MCELF::GetVisibility(OrigData); + uint8_t Type = MCELF::GetType(Data); uint8_t Info = (Binding << ELF_STB_Shift) | (Type << ELF_STT_Shift); uint8_t Other = Visibility; @@ -673,7 +304,7 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF, (Data.getFlags() & ELF_STB_Weak)) && "External symbol requires STB_GLOBAL or STB_WEAK flag"); WriteSymbol(SymtabF, ShndxF, MSD, Layout); - if (GetBinding(Data) == ELF::STB_LOCAL) + if (MCELF::GetBinding(Data) == ELF::STB_LOCAL) LastLocalSymbolIndex++; } @@ -681,7 +312,7 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF, ELFSymbolData &MSD = UndefinedSymbolData[i]; MCSymbolData &Data = *MSD.SymbolData; WriteSymbol(SymtabF, ShndxF, MSD, Layout); - if (GetBinding(Data) == ELF::STB_LOCAL) + if (MCELF::GetBinding(Data) == ELF::STB_LOCAL) LastLocalSymbolIndex++; } } @@ -798,7 +429,7 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm, FixedValue = Value; unsigned Type = GetRelocType(Target, Fixup, IsPCRel, (RelocSymbol != 0), Addend); - + uint64_t RelocOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); @@ -816,8 +447,9 @@ ELFObjectWriter::getSymbolIndexInSymbolTable(const MCAssembler &Asm, return SD.getIndex(); } -static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data, - bool Used, bool Renamed) { +bool ELFObjectWriter::isInSymtab(const MCAssembler &Asm, + const MCSymbolData &Data, + bool Used, bool Renamed) { if (Data.getFlags() & ELF_Other_Weakref) return false; @@ -836,7 +468,7 @@ static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data, if (Symbol.isVariable() && !A.isVariable() && A.isUndefined()) return false; - bool IsGlobal = GetBinding(Data) == ELF::STB_GLOBAL; + bool IsGlobal = MCELF::GetBinding(Data) == ELF::STB_GLOBAL; if (!Symbol.isVariable() && Symbol.isUndefined() && !IsGlobal) return false; @@ -849,8 +481,8 @@ static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data, return true; } -static bool isLocal(const MCSymbolData &Data, bool isSignature, - bool isUsedInReloc) { +bool ELFObjectWriter::isLocal(const MCSymbolData &Data, bool isSignature, + bool isUsedInReloc) { if (Data.isExternal()) return false; @@ -868,7 +500,8 @@ static bool isLocal(const MCSymbolData &Data, bool isSignature, } void ELFObjectWriter::ComputeIndexMap(MCAssembler &Asm, - SectionIndexMapTy &SectionIndexMap) { + SectionIndexMapTy &SectionIndexMap, + const RelMapTy &RelMap) { unsigned Index = 1; for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) { @@ -883,31 +516,37 @@ void ELFObjectWriter::ComputeIndexMap(MCAssembler &Asm, ie = Asm.end(); it != ie; ++it) { const MCSectionELF &Section = static_cast<const MCSectionELF &>(it->getSection()); - if (Section.getType() == ELF::SHT_GROUP) + if (Section.getType() == ELF::SHT_GROUP || + Section.getType() == ELF::SHT_REL || + Section.getType() == ELF::SHT_RELA) continue; SectionIndexMap[&Section] = Index++; + const MCSectionELF *RelSection = RelMap.lookup(&Section); + if (RelSection) + SectionIndexMap[RelSection] = Index++; } } void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm, const SectionIndexMapTy &SectionIndexMap, - RevGroupMapTy RevGroupMap) { + RevGroupMapTy RevGroupMap, + unsigned NumRegularSections) { // FIXME: Is this the correct place to do this? if (NeedsGOT) { llvm::StringRef Name = "_GLOBAL_OFFSET_TABLE_"; MCSymbol *Sym = Asm.getContext().GetOrCreateSymbol(Name); MCSymbolData &Data = Asm.getOrCreateSymbolData(*Sym); Data.setExternal(true); - SetBinding(Data, ELF::STB_GLOBAL); + MCELF::SetBinding(Data, ELF::STB_GLOBAL); } - // Build section lookup table. - int NumRegularSections = Asm.size(); - // Index 0 is always the empty string. StringMap<uint64_t> StringIndexMap; StringTable += '\x00'; + // FIXME: We could optimize suffixes in strtab in the same way we + // optimize them in shstrtab. + // Add the data for the symbols. for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), ie = Asm.symbol_end(); it != ie; ++it) { @@ -929,14 +568,14 @@ void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm, // Undefined symbols are global, but this is the first place we // are able to set it. bool Local = isLocal(*it, isSignature, Used); - if (!Local && GetBinding(*it) == ELF::STB_LOCAL) { + if (!Local && MCELF::GetBinding(*it) == ELF::STB_LOCAL) { MCSymbolData &SD = Asm.getSymbolData(RefSymbol); - SetBinding(*it, ELF::STB_GLOBAL); - SetBinding(SD, ELF::STB_GLOBAL); + MCELF::SetBinding(*it, ELF::STB_GLOBAL); + MCELF::SetBinding(SD, ELF::STB_GLOBAL); } if (RefSymbol.isUndefined() && !Used && WeakrefUsed) - SetBinding(*it, ELF::STB_WEAK); + MCELF::SetBinding(*it, ELF::STB_WEAK); if (it->isCommon()) { assert(!Local); @@ -1004,11 +643,16 @@ void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm, UndefinedSymbolData[i].SymbolData->setIndex(Index++); } -void ELFObjectWriter::WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout, - const MCSectionData &SD) { - if (!Relocations[&SD].empty()) { +void ELFObjectWriter::CreateRelocationSections(MCAssembler &Asm, + MCAsmLayout &Layout, + RelMapTy &RelMap) { + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + const MCSectionData &SD = *it; + if (Relocations[&SD].empty()) + continue; + MCContext &Ctx = Asm.getContext(); - const MCSectionELF *RelaSection; const MCSectionELF &Section = static_cast<const MCSectionELF&>(SD.getSection()); @@ -1022,17 +666,32 @@ void ELFObjectWriter::WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout, else EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel); - RelaSection = Ctx.getELFSection(RelaSectionName, hasRelocationAddend() ? - ELF::SHT_RELA : ELF::SHT_REL, 0, - SectionKind::getReadOnly(), - EntrySize, ""); + const MCSectionELF *RelaSection = + Ctx.getELFSection(RelaSectionName, hasRelocationAddend() ? + ELF::SHT_RELA : ELF::SHT_REL, 0, + SectionKind::getReadOnly(), + EntrySize, ""); + RelMap[&Section] = RelaSection; + Asm.getOrCreateSectionData(*RelaSection); + } +} +void ELFObjectWriter::WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout, + const RelMapTy &RelMap) { + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + const MCSectionData &SD = *it; + const MCSectionELF &Section = + static_cast<const MCSectionELF&>(SD.getSection()); + + const MCSectionELF *RelaSection = RelMap.lookup(&Section); + if (!RelaSection) + continue; MCSectionData &RelaSD = Asm.getOrCreateSectionData(*RelaSection); RelaSD.setAlignment(is64Bit() ? 8 : 4); MCDataFragment *F = new MCDataFragment(&RelaSD); - - WriteRelocationsFragment(Asm, F, &SD); + WriteRelocationsFragment(Asm, F, &*it); } } @@ -1092,9 +751,28 @@ void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm, } } +static int compareBySuffix(const void *a, const void *b) { + const MCSectionELF *secA = *static_cast<const MCSectionELF* const *>(a); + const MCSectionELF *secB = *static_cast<const MCSectionELF* const *>(b); + const StringRef &NameA = secA->getSectionName(); + const StringRef &NameB = secB->getSectionName(); + const unsigned sizeA = NameA.size(); + const unsigned sizeB = NameB.size(); + const unsigned len = std::min(sizeA, sizeB); + for (unsigned int i = 0; i < len; ++i) { + char ca = NameA[sizeA - i - 1]; + char cb = NameB[sizeB - i - 1]; + if (ca != cb) + return cb - ca; + } + + return sizeB - sizeA; +} + void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout, - const SectionIndexMapTy &SectionIndexMap) { + SectionIndexMapTy &SectionIndexMap, + const RelMapTy &RelMap) { MCContext &Ctx = Asm.getContext(); MCDataFragment *F; @@ -1106,7 +784,6 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm, SectionKind::getReadOnly()); MCSectionData &ShstrtabSD = Asm.getOrCreateSectionData(*ShstrtabSection); ShstrtabSD.setAlignment(1); - ShstrtabIndex = Asm.size(); const MCSectionELF *SymtabSection = Ctx.getELFSection(".symtab", ELF::SHT_SYMTAB, 0, @@ -1114,7 +791,6 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm, EntrySize, ""); MCSectionData &SymtabSD = Asm.getOrCreateSectionData(*SymtabSection); SymtabSD.setAlignment(is64Bit() ? 8 : 4); - SymbolTableIndex = Asm.size(); MCSectionData *SymtabShndxSD = NULL; @@ -1126,14 +802,17 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm, SymtabShndxSD->setAlignment(4); } - const MCSection *StrtabSection; + const MCSectionELF *StrtabSection; StrtabSection = Ctx.getELFSection(".strtab", ELF::SHT_STRTAB, 0, SectionKind::getReadOnly()); MCSectionData &StrtabSD = Asm.getOrCreateSectionData(*StrtabSection); StrtabSD.setAlignment(1); - StringTableIndex = Asm.size(); - WriteRelocations(Asm, Layout); + ComputeIndexMap(Asm, SectionIndexMap, RelMap); + + ShstrtabIndex = SectionIndexMap.lookup(ShstrtabSection); + SymbolTableIndex = SectionIndexMap.lookup(SymtabSection); + StringTableIndex = SectionIndexMap.lookup(StrtabSection); // Symbol table F = new MCDataFragment(&SymtabSD); @@ -1148,6 +827,15 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm, F = new MCDataFragment(&ShstrtabSD); + std::vector<const MCSectionELF*> Sections; + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + const MCSectionELF &Section = + static_cast<const MCSectionELF&>(it->getSection()); + Sections.push_back(&Section); + } + array_pod_sort(Sections.begin(), Sections.end(), compareBySuffix); + // Section header string table. // // The first entry of a string table holds a null character so skip @@ -1155,22 +843,20 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm, uint64_t Index = 1; F->getContents() += '\x00'; - StringMap<uint64_t> SecStringMap; - for (MCAssembler::const_iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it) { - const MCSectionELF &Section = - static_cast<const MCSectionELF&>(it->getSection()); - // FIXME: We could merge suffixes like in .text and .rela.text. + for (unsigned int I = 0, E = Sections.size(); I != E; ++I) { + const MCSectionELF &Section = *Sections[I]; StringRef Name = Section.getSectionName(); - if (SecStringMap.count(Name)) { - SectionStringTableIndex[&Section] = SecStringMap[Name]; - continue; + if (I != 0) { + StringRef PreviousName = Sections[I - 1]->getSectionName(); + if (PreviousName.endswith(Name)) { + SectionStringTableIndex[&Section] = Index - Name.size() - 1; + continue; + } } // Remember the index into the string table so we can write it // into the sh_name field of the section header table. SectionStringTableIndex[&Section] = Index; - SecStringMap[Name] = Index; Index += Name.size() + 1; F->getContents() += Name; @@ -1181,7 +867,9 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm, void ELFObjectWriter::CreateIndexedSections(MCAssembler &Asm, MCAsmLayout &Layout, GroupMapTy &GroupMap, - RevGroupMapTy &RevGroupMap) { + RevGroupMapTy &RevGroupMap, + SectionIndexMapTy &SectionIndexMap, + const RelMapTy &RelMap) { // Create the .note.GNU-stack section if needed. MCContext &Ctx = Asm.getContext(); if (Asm.getNoExecStack()) { @@ -1212,11 +900,11 @@ void ELFObjectWriter::CreateIndexedSections(MCAssembler &Asm, GroupMap[Group] = SignatureSymbol; } + ComputeIndexMap(Asm, SectionIndexMap, RelMap); + // Add sections to the groups - unsigned Index = 1; - unsigned NumGroups = RevGroupMap.size(); for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end(); - it != ie; ++it, ++Index) { + it != ie; ++it) { const MCSectionELF &Section = static_cast<const MCSectionELF&>(it->getSection()); if (!(Section.getFlags() & ELF::SHF_GROUP)) @@ -1225,7 +913,8 @@ void ELFObjectWriter::CreateIndexedSections(MCAssembler &Asm, MCSectionData &Data = Asm.getOrCreateSectionData(*Group); // FIXME: we could use the previous fragment MCDataFragment *F = new MCDataFragment(&Data); - String32(*F, NumGroups + Index); + unsigned Index = SectionIndexMap.lookup(&Section); + String32(*F, Index); } } @@ -1304,12 +993,12 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm, Alignment, Section.getEntrySize()); } -static bool IsELFMetaDataSection(const MCSectionData &SD) { +bool ELFObjectWriter::IsELFMetaDataSection(const MCSectionData &SD) { return SD.getOrdinal() == ~UINT32_C(0) && !SD.getSection().isVirtualSection(); } -static uint64_t DataSectionSize(const MCSectionData &SD) { +uint64_t ELFObjectWriter::DataSectionSize(const MCSectionData &SD) { uint64_t Ret = 0; for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e; ++i) { @@ -1320,26 +1009,112 @@ static uint64_t DataSectionSize(const MCSectionData &SD) { return Ret; } -static uint64_t GetSectionFileSize(const MCAsmLayout &Layout, - const MCSectionData &SD) { +uint64_t ELFObjectWriter::GetSectionFileSize(const MCAsmLayout &Layout, + const MCSectionData &SD) { if (IsELFMetaDataSection(SD)) return DataSectionSize(SD); return Layout.getSectionFileSize(&SD); } -static uint64_t GetSectionAddressSize(const MCAsmLayout &Layout, - const MCSectionData &SD) { +uint64_t ELFObjectWriter::GetSectionAddressSize(const MCAsmLayout &Layout, + const MCSectionData &SD) { if (IsELFMetaDataSection(SD)) return DataSectionSize(SD); return Layout.getSectionAddressSize(&SD); } -static void WriteDataSectionData(ELFObjectWriter *W, const MCSectionData &SD) { - for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e; - ++i) { - const MCFragment &F = *i; - assert(F.getKind() == MCFragment::FT_Data); - W->WriteBytes(cast<MCDataFragment>(F).getContents().str()); +void ELFObjectWriter::WriteDataSectionData(MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCSectionELF &Section) { + uint64_t FileOff = OS.tell(); + const MCSectionData &SD = Asm.getOrCreateSectionData(Section); + + uint64_t Padding = OffsetToAlignment(FileOff, SD.getAlignment()); + WriteZeros(Padding); + FileOff += Padding; + + FileOff += GetSectionFileSize(Layout, SD); + + if (IsELFMetaDataSection(SD)) { + for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e; + ++i) { + const MCFragment &F = *i; + assert(F.getKind() == MCFragment::FT_Data); + WriteBytes(cast<MCDataFragment>(F).getContents().str()); + } + } else { + Asm.WriteSectionData(&SD, Layout); + } +} + +void ELFObjectWriter::WriteSectionHeader(MCAssembler &Asm, + const GroupMapTy &GroupMap, + const MCAsmLayout &Layout, + const SectionIndexMapTy &SectionIndexMap, + const SectionOffsetMapTy &SectionOffsetMap) { + const unsigned NumSections = Asm.size() + 1; + + std::vector<const MCSectionELF*> Sections; + Sections.resize(NumSections - 1); + + for (SectionIndexMapTy::const_iterator i= + SectionIndexMap.begin(), e = SectionIndexMap.end(); i != e; ++i) { + const std::pair<const MCSectionELF*, uint32_t> &p = *i; + Sections[p.second - 1] = p.first; + } + + // Null section first. + uint64_t FirstSectionSize = + NumSections >= ELF::SHN_LORESERVE ? NumSections : 0; + uint32_t FirstSectionLink = + ShstrtabIndex >= ELF::SHN_LORESERVE ? ShstrtabIndex : 0; + WriteSecHdrEntry(0, 0, 0, 0, 0, FirstSectionSize, FirstSectionLink, 0, 0, 0); + + for (unsigned i = 0; i < NumSections - 1; ++i) { + const MCSectionELF &Section = *Sections[i]; + const MCSectionData &SD = Asm.getOrCreateSectionData(Section); + uint32_t GroupSymbolIndex; + if (Section.getType() != ELF::SHT_GROUP) + GroupSymbolIndex = 0; + else + GroupSymbolIndex = getSymbolIndexInSymbolTable(Asm, + GroupMap.lookup(&Section)); + + uint64_t Size = GetSectionAddressSize(Layout, SD); + + WriteSection(Asm, SectionIndexMap, GroupSymbolIndex, + SectionOffsetMap.lookup(&Section), Size, + SD.getAlignment(), Section); + } +} + +void ELFObjectWriter::ComputeSectionOrder(MCAssembler &Asm, + std::vector<const MCSectionELF*> &Sections) { + for (MCAssembler::iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + const MCSectionELF &Section = + static_cast<const MCSectionELF &>(it->getSection()); + if (Section.getType() == ELF::SHT_GROUP) + Sections.push_back(&Section); + } + + for (MCAssembler::iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + const MCSectionELF &Section = + static_cast<const MCSectionELF &>(it->getSection()); + if (Section.getType() != ELF::SHT_GROUP && + Section.getType() != ELF::SHT_REL && + Section.getType() != ELF::SHT_RELA) + Sections.push_back(&Section); + } + + for (MCAssembler::iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + const MCSectionELF &Section = + static_cast<const MCSectionELF &>(it->getSection()); + if (Section.getType() == ELF::SHT_REL || + Section.getType() == ELF::SHT_RELA) + Sections.push_back(&Section); } } @@ -1347,107 +1122,108 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) { GroupMapTy GroupMap; RevGroupMapTy RevGroupMap; - CreateIndexedSections(Asm, const_cast<MCAsmLayout&>(Layout), GroupMap, - RevGroupMap); - SectionIndexMapTy SectionIndexMap; - ComputeIndexMap(Asm, SectionIndexMap); + unsigned NumUserSections = Asm.size(); + + DenseMap<const MCSectionELF*, const MCSectionELF*> RelMap; + CreateRelocationSections(Asm, const_cast<MCAsmLayout&>(Layout), RelMap); + + const unsigned NumUserAndRelocSections = Asm.size(); + CreateIndexedSections(Asm, const_cast<MCAsmLayout&>(Layout), GroupMap, + RevGroupMap, SectionIndexMap, RelMap); + const unsigned AllSections = Asm.size(); + const unsigned NumIndexedSections = AllSections - NumUserAndRelocSections; + + unsigned NumRegularSections = NumUserSections + NumIndexedSections; // Compute symbol table information. - ComputeSymbolTable(Asm, SectionIndexMap, RevGroupMap); + ComputeSymbolTable(Asm, SectionIndexMap, RevGroupMap, NumRegularSections); + + + WriteRelocations(Asm, const_cast<MCAsmLayout&>(Layout), RelMap); CreateMetadataSections(const_cast<MCAssembler&>(Asm), const_cast<MCAsmLayout&>(Layout), - SectionIndexMap); - - // Update to include the metadata sections. - ComputeIndexMap(Asm, SectionIndexMap); + SectionIndexMap, + RelMap); - // Add 1 for the null section. - unsigned NumSections = Asm.size() + 1; uint64_t NaturalAlignment = is64Bit() ? 8 : 4; uint64_t HeaderSize = is64Bit() ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr); uint64_t FileOff = HeaderSize; std::vector<const MCSectionELF*> Sections; - Sections.resize(NumSections); - - for (SectionIndexMapTy::const_iterator i= - SectionIndexMap.begin(), e = SectionIndexMap.end(); i != e; ++i) { - const std::pair<const MCSectionELF*, uint32_t> &p = *i; - Sections[p.second] = p.first; - } - - for (unsigned i = 1; i < NumSections; ++i) { + ComputeSectionOrder(Asm, Sections); + unsigned NumSections = Sections.size(); + SectionOffsetMapTy SectionOffsetMap; + for (unsigned i = 0; i < NumRegularSections + 1; ++i) { const MCSectionELF &Section = *Sections[i]; const MCSectionData &SD = Asm.getOrCreateSectionData(Section); FileOff = RoundUpToAlignment(FileOff, SD.getAlignment()); + // Remember the offset into the file for this section. + SectionOffsetMap[&Section] = FileOff; + // Get the size of the section in the output file (including padding). FileOff += GetSectionFileSize(Layout, SD); } FileOff = RoundUpToAlignment(FileOff, NaturalAlignment); - // Write out the ELF header ... - WriteHeader(FileOff - HeaderSize, NumSections); - - FileOff = HeaderSize; + const unsigned SectionHeaderOffset = FileOff - HeaderSize; - // ... then all of the sections ... - DenseMap<const MCSection*, uint64_t> SectionOffsetMap; + uint64_t SectionHeaderEntrySize = is64Bit() ? + sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr); + FileOff += (NumSections + 1) * SectionHeaderEntrySize; - for (unsigned i = 1; i < NumSections; ++i) { + for (unsigned i = NumRegularSections + 1; i < NumSections; ++i) { const MCSectionELF &Section = *Sections[i]; const MCSectionData &SD = Asm.getOrCreateSectionData(Section); - uint64_t Padding = OffsetToAlignment(FileOff, SD.getAlignment()); - WriteZeros(Padding); - FileOff += Padding; + FileOff = RoundUpToAlignment(FileOff, SD.getAlignment()); // Remember the offset into the file for this section. SectionOffsetMap[&Section] = FileOff; + // Get the size of the section in the output file (including padding). FileOff += GetSectionFileSize(Layout, SD); - - if (IsELFMetaDataSection(SD)) - WriteDataSectionData(this, SD); - else - Asm.WriteSectionData(&SD, Layout); } + // Write out the ELF header ... + WriteHeader(SectionHeaderOffset, NumSections + 1); + + // ... then the regular sections ... + // + because of .shstrtab + for (unsigned i = 0; i < NumRegularSections + 1; ++i) + WriteDataSectionData(Asm, Layout, *Sections[i]); + + FileOff = OS.tell(); uint64_t Padding = OffsetToAlignment(FileOff, NaturalAlignment); WriteZeros(Padding); - FileOff += Padding; - // ... and then the section header table. - // Should we align the section header table? - // - // Null section first. - uint64_t FirstSectionSize = - NumSections >= ELF::SHN_LORESERVE ? NumSections : 0; - uint32_t FirstSectionLink = - ShstrtabIndex >= ELF::SHN_LORESERVE ? ShstrtabIndex : 0; - WriteSecHdrEntry(0, 0, 0, 0, 0, FirstSectionSize, FirstSectionLink, 0, 0, 0); + // ... then the section header table ... + WriteSectionHeader(Asm, GroupMap, Layout, SectionIndexMap, + SectionOffsetMap); - for (unsigned i = 1; i < NumSections; ++i) { - const MCSectionELF &Section = *Sections[i]; - const MCSectionData &SD = Asm.getOrCreateSectionData(Section); - uint32_t GroupSymbolIndex; - if (Section.getType() != ELF::SHT_GROUP) - GroupSymbolIndex = 0; - else - GroupSymbolIndex = getSymbolIndexInSymbolTable(Asm, GroupMap[&Section]); + FileOff = OS.tell(); - uint64_t Size = GetSectionAddressSize(Layout, SD); + // ... and then the remainting sections ... + for (unsigned i = NumRegularSections + 1; i < NumSections; ++i) + WriteDataSectionData(Asm, Layout, *Sections[i]); +} - WriteSection(Asm, SectionIndexMap, GroupSymbolIndex, - SectionOffsetMap[&Section], Size, - SD.getAlignment(), Section); - } +bool +ELFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, + const MCSymbolData &DataA, + const MCFragment &FB, + bool InSet, + bool IsPCRel) const { + if (DataA.getFlags() & ELF_STB_Weak) + return false; + return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl( + Asm, DataA, FB,InSet, IsPCRel); } MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW, @@ -1700,13 +1476,17 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, if (IsPCRel) { switch ((unsigned)Fixup.getKind()) { default: llvm_unreachable("invalid fixup kind!"); + + case FK_Data_8: Type = ELF::R_X86_64_PC64; break; + case FK_Data_4: Type = ELF::R_X86_64_PC32; break; + case FK_Data_2: Type = ELF::R_X86_64_PC16; break; + case FK_PCRel_8: assert(Modifier == MCSymbolRefExpr::VK_None); Type = ELF::R_X86_64_PC64; break; case X86::reloc_signed_4byte: case X86::reloc_riprel_4byte_movq_load: - case FK_Data_4: // FIXME? case X86::reloc_riprel_4byte: case FK_PCRel_4: switch (Modifier) { diff --git a/lib/MC/ELFObjectWriter.h b/lib/MC/ELFObjectWriter.h new file mode 100644 index 0000000..f1d514a --- /dev/null +++ b/lib/MC/ELFObjectWriter.h @@ -0,0 +1,406 @@ +//===- lib/MC/ELFObjectWriter.h - ELF File Writer -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ELF object file writer information. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_ELFOBJECTWRITER_H +#define LLVM_MC_ELFOBJECTWRITER_H + +#include "MCELF.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCELFSymbolFlags.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSymbol.h" + +#include <vector> + +namespace llvm { + +class MCSection; +class MCDataFragment; +class MCSectionELF; + +class ELFObjectWriter : public MCObjectWriter { + protected: + + static bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind); + static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant); + static uint64_t SymbolValue(MCSymbolData &Data, const MCAsmLayout &Layout); + static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data, + bool Used, bool Renamed); + static bool isLocal(const MCSymbolData &Data, bool isSignature, + bool isUsedInReloc); + static bool IsELFMetaDataSection(const MCSectionData &SD); + static uint64_t DataSectionSize(const MCSectionData &SD); + static uint64_t GetSectionFileSize(const MCAsmLayout &Layout, + const MCSectionData &SD); + static uint64_t GetSectionAddressSize(const MCAsmLayout &Layout, + const MCSectionData &SD); + + void WriteDataSectionData(MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCSectionELF &Section); + + /*static bool isFixupKindX86RIPRel(unsigned Kind) { + return Kind == X86::reloc_riprel_4byte || + Kind == X86::reloc_riprel_4byte_movq_load; + }*/ + + /// ELFSymbolData - Helper struct for containing some precomputed + /// information on symbols. + struct ELFSymbolData { + MCSymbolData *SymbolData; + uint64_t StringIndex; + uint32_t SectionIndex; + + // Support lexicographic sorting. + bool operator<(const ELFSymbolData &RHS) const { + if (MCELF::GetType(*SymbolData) == ELF::STT_FILE) + return true; + if (MCELF::GetType(*RHS.SymbolData) == ELF::STT_FILE) + return false; + return SymbolData->getSymbol().getName() < + RHS.SymbolData->getSymbol().getName(); + } + }; + + /// @name Relocation Data + /// @{ + + struct ELFRelocationEntry { + // Make these big enough for both 32-bit and 64-bit + uint64_t r_offset; + int Index; + unsigned Type; + const MCSymbol *Symbol; + uint64_t r_addend; + + ELFRelocationEntry() + : r_offset(0), Index(0), Type(0), Symbol(0), r_addend(0) {} + + ELFRelocationEntry(uint64_t RelocOffset, int Idx, + unsigned RelType, const MCSymbol *Sym, + uint64_t Addend) + : r_offset(RelocOffset), Index(Idx), Type(RelType), + Symbol(Sym), r_addend(Addend) {} + + // Support lexicographic sorting. + bool operator<(const ELFRelocationEntry &RE) const { + return RE.r_offset < r_offset; + } + }; + + /// The target specific ELF writer instance. + llvm::OwningPtr<MCELFObjectTargetWriter> TargetObjectWriter; + + SmallPtrSet<const MCSymbol *, 16> UsedInReloc; + SmallPtrSet<const MCSymbol *, 16> WeakrefUsedInReloc; + DenseMap<const MCSymbol *, const MCSymbol *> Renames; + + llvm::DenseMap<const MCSectionData*, + std::vector<ELFRelocationEntry> > Relocations; + DenseMap<const MCSection*, uint64_t> SectionStringTableIndex; + + /// @} + /// @name Symbol Table Data + /// @{ + + SmallString<256> StringTable; + std::vector<ELFSymbolData> LocalSymbolData; + std::vector<ELFSymbolData> ExternalSymbolData; + std::vector<ELFSymbolData> UndefinedSymbolData; + + /// @} + + bool NeedsGOT; + + bool NeedsSymtabShndx; + + // This holds the symbol table index of the last local symbol. + unsigned LastLocalSymbolIndex; + // This holds the .strtab section index. + unsigned StringTableIndex; + // This holds the .symtab section index. + unsigned SymbolTableIndex; + + unsigned ShstrtabIndex; + + + const MCSymbol *SymbolToReloc(const MCAssembler &Asm, + const MCValue &Target, + const MCFragment &F) const; + + // For arch-specific emission of explicit reloc symbol + virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, + const MCValue &Target, + const MCFragment &F, + bool IsBSS) const { + return NULL; + } + + bool is64Bit() const { return TargetObjectWriter->is64Bit(); } + bool hasRelocationAddend() const { + return TargetObjectWriter->hasRelocationAddend(); + } + + public: + ELFObjectWriter(MCELFObjectTargetWriter *MOTW, + raw_ostream &_OS, bool IsLittleEndian) + : MCObjectWriter(_OS, IsLittleEndian), + TargetObjectWriter(MOTW), + NeedsGOT(false), NeedsSymtabShndx(false){ + } + + virtual ~ELFObjectWriter(); + + void WriteWord(uint64_t W) { + if (is64Bit()) + Write64(W); + else + Write32(W); + } + + void StringLE16(char *buf, uint16_t Value) { + buf[0] = char(Value >> 0); + buf[1] = char(Value >> 8); + } + + void StringLE32(char *buf, uint32_t Value) { + StringLE16(buf, uint16_t(Value >> 0)); + StringLE16(buf + 2, uint16_t(Value >> 16)); + } + + void StringLE64(char *buf, uint64_t Value) { + StringLE32(buf, uint32_t(Value >> 0)); + StringLE32(buf + 4, uint32_t(Value >> 32)); + } + + void StringBE16(char *buf ,uint16_t Value) { + buf[0] = char(Value >> 8); + buf[1] = char(Value >> 0); + } + + void StringBE32(char *buf, uint32_t Value) { + StringBE16(buf, uint16_t(Value >> 16)); + StringBE16(buf + 2, uint16_t(Value >> 0)); + } + + void StringBE64(char *buf, uint64_t Value) { + StringBE32(buf, uint32_t(Value >> 32)); + StringBE32(buf + 4, uint32_t(Value >> 0)); + } + + void String8(MCDataFragment &F, uint8_t Value) { + char buf[1]; + buf[0] = Value; + F.getContents() += StringRef(buf, 1); + } + + void String16(MCDataFragment &F, uint16_t Value) { + char buf[2]; + if (isLittleEndian()) + StringLE16(buf, Value); + else + StringBE16(buf, Value); + F.getContents() += StringRef(buf, 2); + } + + void String32(MCDataFragment &F, uint32_t Value) { + char buf[4]; + if (isLittleEndian()) + StringLE32(buf, Value); + else + StringBE32(buf, Value); + F.getContents() += StringRef(buf, 4); + } + + void String64(MCDataFragment &F, uint64_t Value) { + char buf[8]; + if (isLittleEndian()) + StringLE64(buf, Value); + else + StringBE64(buf, Value); + F.getContents() += StringRef(buf, 8); + } + + virtual void WriteHeader(uint64_t SectionDataSize, unsigned NumberOfSections); + + /// Default e_flags = 0 + virtual void WriteEFlags() { Write32(0); } + + virtual void WriteSymbolEntry(MCDataFragment *SymtabF, MCDataFragment *ShndxF, + uint64_t name, uint8_t info, + uint64_t value, uint64_t size, + uint8_t other, uint32_t shndx, + bool Reserved); + + virtual void WriteSymbol(MCDataFragment *SymtabF, MCDataFragment *ShndxF, + ELFSymbolData &MSD, + const MCAsmLayout &Layout); + + typedef DenseMap<const MCSectionELF*, uint32_t> SectionIndexMapTy; + virtual void WriteSymbolTable(MCDataFragment *SymtabF, MCDataFragment *ShndxF, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const SectionIndexMapTy &SectionIndexMap); + + virtual void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, + MCValue Target, uint64_t &FixedValue); + + virtual uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm, + const MCSymbol *S); + + // Map from a group section to the signature symbol + typedef DenseMap<const MCSectionELF*, const MCSymbol*> GroupMapTy; + // Map from a signature symbol to the group section + typedef DenseMap<const MCSymbol*, const MCSectionELF*> RevGroupMapTy; + // Map from a section to the section with the relocations + typedef DenseMap<const MCSectionELF*, const MCSectionELF*> RelMapTy; + // Map from a section to its offset + typedef DenseMap<const MCSectionELF*, uint64_t> SectionOffsetMapTy; + + /// ComputeSymbolTable - Compute the symbol table data + /// + /// \param StringTable [out] - The string table data. + /// \param StringIndexMap [out] - Map from symbol names to offsets in the + /// string table. + virtual void ComputeSymbolTable(MCAssembler &Asm, + const SectionIndexMapTy &SectionIndexMap, + RevGroupMapTy RevGroupMap, + unsigned NumRegularSections); + + virtual void ComputeIndexMap(MCAssembler &Asm, + SectionIndexMapTy &SectionIndexMap, + const RelMapTy &RelMap); + + void CreateRelocationSections(MCAssembler &Asm, MCAsmLayout &Layout, + RelMapTy &RelMap); + + void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout, + const RelMapTy &RelMap); + + virtual void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout, + SectionIndexMapTy &SectionIndexMap, + const RelMapTy &RelMap); + + // Create the sections that show up in the symbol table. Currently + // those are the .note.GNU-stack section and the group sections. + virtual void CreateIndexedSections(MCAssembler &Asm, MCAsmLayout &Layout, + GroupMapTy &GroupMap, + RevGroupMapTy &RevGroupMap, + SectionIndexMapTy &SectionIndexMap, + const RelMapTy &RelMap); + + virtual void ExecutePostLayoutBinding(MCAssembler &Asm, + const MCAsmLayout &Layout); + + void WriteSectionHeader(MCAssembler &Asm, const GroupMapTy &GroupMap, + const MCAsmLayout &Layout, + const SectionIndexMapTy &SectionIndexMap, + const SectionOffsetMapTy &SectionOffsetMap); + + void ComputeSectionOrder(MCAssembler &Asm, + std::vector<const MCSectionELF*> &Sections); + + virtual void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags, + uint64_t Address, uint64_t Offset, + uint64_t Size, uint32_t Link, uint32_t Info, + uint64_t Alignment, uint64_t EntrySize); + + virtual void WriteRelocationsFragment(const MCAssembler &Asm, + MCDataFragment *F, + const MCSectionData *SD); + + virtual bool + IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, + const MCSymbolData &DataA, + const MCFragment &FB, + bool InSet, + bool IsPCRel) const; + + virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout); + virtual void WriteSection(MCAssembler &Asm, + const SectionIndexMapTy &SectionIndexMap, + uint32_t GroupSymbolIndex, + uint64_t Offset, uint64_t Size, uint64_t Alignment, + const MCSectionELF &Section); + + protected: + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend) = 0; + }; + + //===- X86ELFObjectWriter -------------------------------------------===// + + class X86ELFObjectWriter : public ELFObjectWriter { + public: + X86ELFObjectWriter(MCELFObjectTargetWriter *MOTW, + raw_ostream &_OS, + bool IsLittleEndian); + + virtual ~X86ELFObjectWriter(); + protected: + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend); + }; + + + //===- ARMELFObjectWriter -------------------------------------------===// + + class ARMELFObjectWriter : public ELFObjectWriter { + public: + // FIXME: MCAssembler can't yet return the Subtarget, + enum { DefaultEABIVersion = 0x05000000U }; + + ARMELFObjectWriter(MCELFObjectTargetWriter *MOTW, + raw_ostream &_OS, + bool IsLittleEndian); + + virtual ~ARMELFObjectWriter(); + + virtual void WriteEFlags(); + protected: + virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, + const MCValue &Target, + const MCFragment &F, + bool IsBSS) const; + + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend); + }; + + //===- MBlazeELFObjectWriter -------------------------------------------===// + + class MBlazeELFObjectWriter : public ELFObjectWriter { + public: + MBlazeELFObjectWriter(MCELFObjectTargetWriter *MOTW, + raw_ostream &_OS, + bool IsLittleEndian); + + virtual ~MBlazeELFObjectWriter(); + protected: + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend); + }; +} + +#endif diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index 8199fb2..541dd08 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -13,7 +13,11 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/Dwarf.h" #include <cctype> #include <cstring> using namespace llvm; @@ -26,7 +30,7 @@ MCAsmInfo::MCAsmInfo() { LinkerRequiresNonEmptyDwarfLines = false; MaxInstLength = 4; PCSymbol = "$"; - SeparatorChar = ';'; + SeparatorString = ";"; CommentColumn = 40; CommentString = "#"; LabelSuffix = ":"; @@ -106,3 +110,25 @@ unsigned MCAsmInfo::getSLEB128Size(int Value) { } while (IsMore); return Size; } + +const MCExpr * +MCAsmInfo::getExprForPersonalitySymbol(const MCSymbol *Sym, + unsigned Encoding, + MCStreamer &Streamer) const { + return getExprForFDESymbol(Sym, Encoding, Streamer); +} + +const MCExpr * +MCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym, + unsigned Encoding, + MCStreamer &Streamer) const { + if (!(Encoding & dwarf::DW_EH_PE_pcrel)) + return MCSymbolRefExpr::Create(Sym, Streamer.getContext()); + + MCContext &Context = Streamer.getContext(); + const MCExpr *Res = MCSymbolRefExpr::Create(Sym, Context); + MCSymbol *PCSym = Context.CreateTempSymbol(); + Streamer.EmitLabel(PCSym); + const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context); + return MCBinaryExpr::CreateSub(Res, PC, Context); +} diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp index 526ad0d..4dd1d44 100644 --- a/lib/MC/MCAsmInfoDarwin.cpp +++ b/lib/MC/MCAsmInfoDarwin.cpp @@ -13,6 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCAsmInfoDarwin.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" using namespace llvm; MCAsmInfoDarwin::MCAsmInfoDarwin() { @@ -56,4 +59,3 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() { DwarfUsesAbsoluteLabelForStmtList = false; DwarfUsesLabelOffsetForRanges = false; } - diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 8d06982..9717c01 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -45,20 +45,25 @@ class MCAsmStreamer : public MCStreamer { unsigned IsVerboseAsm : 1; unsigned ShowInst : 1; unsigned UseLoc : 1; + unsigned UseCFI : 1; + + enum EHSymbolFlags { EHGlobal = 1, + EHWeakDefinition = 1 << 1, + EHPrivateExtern = 1 << 2 }; + DenseMap<const MCSymbol*, unsigned> FlagMap; bool needsSet(const MCExpr *Value); public: MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os, - bool isVerboseAsm, - bool useLoc, + bool isVerboseAsm, bool useLoc, bool useCFI, MCInstPrinter *printer, MCCodeEmitter *emitter, TargetAsmBackend *asmbackend, bool showInst) : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()), InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend), CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm), - ShowInst(showInst), UseLoc(useLoc) { + ShowInst(showInst), UseLoc(useLoc), UseCFI(useCFI) { if (InstPrinter && IsVerboseAsm) InstPrinter->setCommentStream(CommentStream); } @@ -118,7 +123,8 @@ public: } virtual void EmitLabel(MCSymbol *Symbol); - + virtual void EmitEHSymAttributes(const MCSymbol *Symbol, + MCSymbol *EHSymbol); virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); virtual void EmitThumbFunc(MCSymbol *Func); @@ -127,6 +133,8 @@ public: virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta, const MCSymbol *LastLabel, const MCSymbol *Label); + virtual void EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel, + const MCSymbol *Label); virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); @@ -154,13 +162,13 @@ public: virtual void EmitBytes(StringRef Data, unsigned AddrSpace); virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, - bool isPCRel, unsigned AddrSpace); + unsigned AddrSpace); virtual void EmitIntValue(uint64_t Value, unsigned Size, unsigned AddrSpace = 0); - virtual void EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace = 0); + virtual void EmitULEB128Value(const MCExpr *Value); - virtual void EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace = 0); + virtual void EmitSLEB128Value(const MCExpr *Value); virtual void EmitGPRel32Value(const MCExpr *Value); @@ -182,15 +190,32 @@ public: virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename); virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line, unsigned Column, unsigned Flags, - unsigned Isa, unsigned Discriminator); + unsigned Isa, unsigned Discriminator, + StringRef FileName); + + virtual void EmitCFIStartProc(); + virtual void EmitCFIEndProc(); + virtual void EmitCFIDefCfa(int64_t Register, int64_t Offset); + virtual void EmitCFIDefCfaOffset(int64_t Offset); + virtual void EmitCFIDefCfaRegister(int64_t Register); + virtual void EmitCFIOffset(int64_t Register, int64_t Offset); + virtual void EmitCFIPersonality(const MCSymbol *Sym, unsigned Encoding); + virtual void EmitCFILsda(const MCSymbol *Sym, unsigned Encoding); + virtual void EmitCFIRememberState(); + virtual void EmitCFIRestoreState(); + virtual void EmitCFISameValue(int64_t Register); + virtual void EmitCFIRelOffset(int64_t Register, int64_t Offset); + virtual void EmitCFIAdjustCfaOffset(int64_t Adjustment); + + virtual void EmitFnStart(); + virtual void EmitFnEnd(); + virtual void EmitCantUnwind(); + virtual void EmitPersonality(const MCSymbol *Personality); + virtual void EmitHandlerData(); + virtual void EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0); + virtual void EmitPad(int64_t Offset); + virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool); - virtual bool EmitCFIStartProc(); - virtual bool EmitCFIEndProc(); - virtual bool EmitCFIDefCfaOffset(int64_t Offset); - virtual bool EmitCFIDefCfaRegister(int64_t Register); - virtual bool EmitCFIOffset(int64_t Register, int64_t Offset); - virtual bool EmitCFIPersonality(const MCSymbol *Sym, unsigned Encoding); - virtual bool EmitCFILsda(const MCSymbol *Sym, unsigned Encoding); virtual void EmitInstruction(const MCInst &Inst); @@ -259,14 +284,27 @@ void MCAsmStreamer::ChangeSection(const MCSection *Section) { Section->PrintSwitchToSection(MAI, OS); } +void MCAsmStreamer::EmitEHSymAttributes(const MCSymbol *Symbol, + MCSymbol *EHSymbol) { + if (UseCFI) + return; + + unsigned Flags = FlagMap.lookup(Symbol); + + if (Flags & EHGlobal) + EmitSymbolAttribute(EHSymbol, MCSA_Global); + if (Flags & EHWeakDefinition) + EmitSymbolAttribute(EHSymbol, MCSA_WeakDefinition); + if (Flags & EHPrivateExtern) + EmitSymbolAttribute(EHSymbol, MCSA_PrivateExtern); +} + void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); - assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); - assert(getCurrentSection() && "Cannot emit before setting section!"); + MCStreamer::EmitLabel(Symbol); OS << *Symbol << MAI.getLabelSuffix(); EmitEOL(); - Symbol->setSection(*getCurrentSection()); } void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { @@ -309,6 +347,15 @@ void MCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta, getContext().getTargetAsmInfo().getPointerSize()); } +void MCAsmStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel, + const MCSymbol *Label) { + EmitIntValue(dwarf::DW_CFA_advance_loc4, 1); + const MCExpr *AddrDelta = BuildSymbolDiff(getContext(), Label, LastLabel); + AddrDelta = ForceExpAbs(this, getContext(), AddrDelta); + EmitValue(AddrDelta, 4); +} + + void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) { switch (Attribute) { @@ -337,6 +384,7 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, return; case MCSA_Global: // .globl/.global OS << MAI.getGlobalDirective(); + FlagMap[Symbol] |= EHGlobal; break; case MCSA_Hidden: OS << "\t.hidden\t"; break; case MCSA_IndirectSymbol: OS << "\t.indirect_symbol\t"; break; @@ -345,11 +393,17 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, case MCSA_Local: OS << "\t.local\t"; break; case MCSA_NoDeadStrip: OS << "\t.no_dead_strip\t"; break; case MCSA_SymbolResolver: OS << "\t.symbol_resolver\t"; break; - case MCSA_PrivateExtern: OS << "\t.private_extern\t"; break; + case MCSA_PrivateExtern: + OS << "\t.private_extern\t"; + FlagMap[Symbol] |= EHPrivateExtern; + break; case MCSA_Protected: OS << "\t.protected\t"; break; case MCSA_Reference: OS << "\t.reference\t"; break; case MCSA_Weak: OS << "\t.weak\t"; break; - case MCSA_WeakDefinition: OS << "\t.weak_definition\t"; break; + case MCSA_WeakDefinition: + OS << "\t.weak_definition\t"; + FlagMap[Symbol] |= EHWeakDefinition; + break; // .weak_reference case MCSA_WeakReference: OS << MAI.getWeakRefDirective(); break; case MCSA_WeakDefAutoPrivate: OS << "\t.weak_def_can_be_hidden\t"; break; @@ -512,9 +566,8 @@ void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size, } void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, - bool isPCRel, unsigned AddrSpace) { + unsigned AddrSpace) { assert(getCurrentSection() && "Cannot emit contents before setting section!"); - assert(!isPCRel && "Cannot emit pc relative relocations!"); const char *Directive = 0; switch (Size) { default: break; @@ -543,10 +596,10 @@ void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, EmitEOL(); } -void MCAsmStreamer::EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace) { +void MCAsmStreamer::EmitULEB128Value(const MCExpr *Value) { int64_t IntValue; if (Value->EvaluateAsAbsolute(IntValue)) { - EmitULEB128IntValue(IntValue, AddrSpace); + EmitULEB128IntValue(IntValue); return; } assert(MAI.hasLEB128() && "Cannot print a .uleb"); @@ -554,10 +607,10 @@ void MCAsmStreamer::EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace) { EmitEOL(); } -void MCAsmStreamer::EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace) { +void MCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) { int64_t IntValue; if (Value->EvaluateAsAbsolute(IntValue)) { - EmitSLEB128IntValue(IntValue, AddrSpace); + EmitSLEB128IntValue(IntValue); return; } assert(MAI.hasLEB128() && "Cannot print a .sleb"); @@ -673,9 +726,10 @@ bool MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Filename){ void MCAsmStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line, unsigned Column, unsigned Flags, unsigned Isa, - unsigned Discriminator) { + unsigned Discriminator, + StringRef FileName) { this->MCStreamer::EmitDwarfLocDirective(FileNo, Line, Column, Flags, - Isa, Discriminator); + Isa, Discriminator, FileName); if (!UseLoc) return; @@ -701,78 +755,144 @@ void MCAsmStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line, OS << "isa " << Isa; if (Discriminator) OS << "discriminator " << Discriminator; + + if (IsVerboseAsm) { + OS.PadToColumn(MAI.getCommentColumn()); + OS << MAI.getCommentString() << ' ' << FileName << ':' + << Line << ':' << Column; + } EmitEOL(); } -bool MCAsmStreamer::EmitCFIStartProc() { - if (this->MCStreamer::EmitCFIStartProc()) - return true; +void MCAsmStreamer::EmitCFIStartProc() { + MCStreamer::EmitCFIStartProc(); + + if (!UseCFI) + return; OS << "\t.cfi_startproc"; EmitEOL(); - - return false; } -bool MCAsmStreamer::EmitCFIEndProc() { - if (this->MCStreamer::EmitCFIEndProc()) - return true; +void MCAsmStreamer::EmitCFIEndProc() { + MCStreamer::EmitCFIEndProc(); + + if (!UseCFI) + return; OS << "\t.cfi_endproc"; EmitEOL(); +} + +void MCAsmStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) { + MCStreamer::EmitCFIDefCfa(Register, Offset); + + if (!UseCFI) + return; - return false; + OS << ".cfi_def_cfa " << Register << ", " << Offset; + EmitEOL(); } -bool MCAsmStreamer::EmitCFIDefCfaOffset(int64_t Offset) { - if (this->MCStreamer::EmitCFIDefCfaOffset(Offset)) - return true; +void MCAsmStreamer::EmitCFIDefCfaOffset(int64_t Offset) { + MCStreamer::EmitCFIDefCfaOffset(Offset); + + if (!UseCFI) + return; OS << "\t.cfi_def_cfa_offset " << Offset; EmitEOL(); - - return false; } -bool MCAsmStreamer::EmitCFIDefCfaRegister(int64_t Register) { - if (this->MCStreamer::EmitCFIDefCfaRegister(Register)) - return true; +void MCAsmStreamer::EmitCFIDefCfaRegister(int64_t Register) { + MCStreamer::EmitCFIDefCfaRegister(Register); + + if (!UseCFI) + return; OS << "\t.cfi_def_cfa_register " << Register; EmitEOL(); - - return false; } -bool MCAsmStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) { - if (this->MCStreamer::EmitCFIOffset(Register, Offset)) - return true; +void MCAsmStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) { + this->MCStreamer::EmitCFIOffset(Register, Offset); + + if (!UseCFI) + return; OS << "\t.cfi_offset " << Register << ", " << Offset; EmitEOL(); - - return false; } -bool MCAsmStreamer::EmitCFIPersonality(const MCSymbol *Sym, +void MCAsmStreamer::EmitCFIPersonality(const MCSymbol *Sym, unsigned Encoding) { - if (this->MCStreamer::EmitCFIPersonality(Sym, Encoding)) - return true; + MCStreamer::EmitCFIPersonality(Sym, Encoding); + + if (!UseCFI) + return; OS << "\t.cfi_personality " << Encoding << ", " << *Sym; EmitEOL(); - - return false; } -bool MCAsmStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) { - if (this->MCStreamer::EmitCFILsda(Sym, Encoding)) - return true; +void MCAsmStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) { + MCStreamer::EmitCFILsda(Sym, Encoding); + + if (!UseCFI) + return; OS << "\t.cfi_lsda " << Encoding << ", " << *Sym; EmitEOL(); +} + +void MCAsmStreamer::EmitCFIRememberState() { + MCStreamer::EmitCFIRememberState(); + + if (!UseCFI) + return; + + OS << "\t.cfi_remember_state"; + EmitEOL(); +} + +void MCAsmStreamer::EmitCFIRestoreState() { + MCStreamer::EmitCFIRestoreState(); + + if (!UseCFI) + return; - return false; + OS << "\t.cfi_restore_state"; + EmitEOL(); +} + +void MCAsmStreamer::EmitCFISameValue(int64_t Register) { + MCStreamer::EmitCFISameValue(Register); + + if (!UseCFI) + return; + + OS << "\t.cfi_same_value " << Register; + EmitEOL(); +} + +void MCAsmStreamer::EmitCFIRelOffset(int64_t Register, int64_t Offset) { + MCStreamer::EmitCFIRelOffset(Register, Offset); + + if (!UseCFI) + return; + + OS << "\t.cfi_rel_offset " << Register << ", " << Offset; + EmitEOL(); +} + +void MCAsmStreamer::EmitCFIAdjustCfaOffset(int64_t Adjustment) { + MCStreamer::EmitCFIAdjustCfaOffset(Adjustment); + + if (!UseCFI) + return; + + OS << "\t.cfi_adjust_cfa_offset " << Adjustment; + EmitEOL(); } void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) { @@ -834,13 +954,13 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) { OS << "0b"; for (unsigned j = 8; j--;) { unsigned Bit = (Code[i] >> j) & 1; - + unsigned FixupBit; if (getContext().getTargetAsmInfo().isLittleEndian()) FixupBit = i * 8 + j; else FixupBit = i * 8 + (7-j); - + if (uint8_t MapEntry = FixupMap[FixupBit]) { assert(Bit == 0 && "Encoder wrote into fixed up bit!"); OS << char('A' + MapEntry - 1); @@ -859,12 +979,64 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) { } } +void MCAsmStreamer::EmitFnStart() { + OS << "\t.fnstart"; + EmitEOL(); +} + +void MCAsmStreamer::EmitFnEnd() { + OS << "\t.fnend"; + EmitEOL(); +} + +void MCAsmStreamer::EmitCantUnwind() { + OS << "\t.cantunwind"; + EmitEOL(); +} + +void MCAsmStreamer::EmitHandlerData() { + OS << "\t.handlerdata"; + EmitEOL(); +} + +void MCAsmStreamer::EmitPersonality(const MCSymbol *Personality) { + OS << "\t.personality " << Personality->getName(); + EmitEOL(); +} + +void MCAsmStreamer::EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) { + OS << "\t.setfp\t" << InstPrinter->getRegName(FpReg) + << ", " << InstPrinter->getRegName(SpReg); + if (Offset) + OS << ", #" << Offset; + EmitEOL(); +} + +void MCAsmStreamer::EmitPad(int64_t Offset) { + OS << "\t.pad\t#" << Offset; + EmitEOL(); +} + +void MCAsmStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList, + bool isVector) { + assert(RegList.size() && "RegList should not be empty"); + if (isVector) + OS << "\t.vsave\t{"; + else + OS << "\t.save\t{"; + + OS << InstPrinter->getRegName(RegList[0]); + + for (unsigned i = 1, e = RegList.size(); i != e; ++i) + OS << ", " << InstPrinter->getRegName(RegList[i]); + + OS << "}"; + EmitEOL(); +} + void MCAsmStreamer::EmitInstruction(const MCInst &Inst) { assert(getCurrentSection() && "Cannot emit contents before setting section!"); - if (!UseLoc) - MCLineEntry::Make(this, getCurrentSection()); - // Show the encoding in a comment if we have a code emitter. if (Emitter) AddEncodingComment(Inst); @@ -897,13 +1069,17 @@ void MCAsmStreamer::Finish() { // Dump out the dwarf file & directory tables and line tables. if (getContext().hasDwarfFiles() && !UseLoc) MCDwarfFileTable::Emit(this); + + if (getNumFrameInfos() && !UseCFI) + MCDwarfFrameEmitter::Emit(*this, false); } MCStreamer *llvm::createAsmStreamer(MCContext &Context, formatted_raw_ostream &OS, bool isVerboseAsm, bool useLoc, + bool useCFI, MCInstPrinter *IP, MCCodeEmitter *CE, TargetAsmBackend *TAB, bool ShowInst) { - return new MCAsmStreamer(Context, OS, isVerboseAsm, useLoc, + return new MCAsmStreamer(Context, OS, isVerboseAsm, useLoc, useCFI, IP, CE, TAB, ShowInst); } diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 9992646..8360fc9f 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -28,7 +28,6 @@ #include "llvm/Target/TargetRegistry.h" #include "llvm/Target/TargetAsmBackend.h" -#include <vector> using namespace llvm; namespace { @@ -103,6 +102,33 @@ uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const { } uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const { + const MCSymbol &S = SD->getSymbol(); + + // If this is a variable, then recursively evaluate now. + if (S.isVariable()) { + MCValue Target; + if (!S.getVariableValue()->EvaluateAsRelocatable(Target, *this)) + report_fatal_error("unable to evaluate offset for variable '" + + S.getName() + "'"); + + // Verify that any used symbols are defined. + if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined()) + report_fatal_error("unable to evaluate offset to undefined symbol '" + + Target.getSymA()->getSymbol().getName() + "'"); + if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined()) + report_fatal_error("unable to evaluate offset to undefined symbol '" + + Target.getSymB()->getSymbol().getName() + "'"); + + uint64_t Offset = Target.getConstant(); + if (Target.getSymA()) + Offset += getSymbolOffset(&Assembler.getSymbolData( + Target.getSymA()->getSymbol())); + if (Target.getSymB()) + Offset -= getSymbolOffset(&Assembler.getSymbolData( + Target.getSymB()->getSymbol())); + return Offset; + } + assert(SD->getFragment() && "Invalid getOffset() on undefined symbol!"); return getFragmentOffset(SD->getFragment()) + SD->getOffset(); } @@ -692,7 +718,9 @@ bool MCAssembler::RelaxInstruction(MCAsmLayout &Layout, bool MCAssembler::RelaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) { int64_t Value = 0; uint64_t OldSize = LF.getContents().size(); - LF.getValue().EvaluateAsAbsolute(Value, Layout); + bool IsAbs = LF.getValue().EvaluateAsAbsolute(Value, Layout); + (void)IsAbs; + assert(IsAbs); SmallString<8> &Data = LF.getContents(); Data.clear(); raw_svector_ostream OSE(Data); @@ -731,7 +759,8 @@ bool MCAssembler::RelaxDwarfCallFrameFragment(MCAsmLayout &Layout, SmallString<8> &Data = DF.getContents(); Data.clear(); raw_svector_ostream OSE(Data); - MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OSE); + const TargetAsmInfo &AsmInfo = getContext().getTargetAsmInfo(); + MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OSE, AsmInfo); OSE.flush(); return OldSize != Data.size(); } diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index 018f00c..8faa72e 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -27,8 +27,11 @@ typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy; MCContext::MCContext(const MCAsmInfo &mai, const TargetAsmInfo *tai) : - MAI(mai), TAI(tai), NextUniqueID(0), - CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0) { + MAI(mai), TAI(tai), + Allocator(), Symbols(Allocator), UsedNames(Allocator), + NextUniqueID(0), + CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0), + AllowTemporaryLabels(true) { MachOUniquingMap = 0; ELFUniquingMap = 0; COFFUniquingMap = 0; @@ -76,18 +79,19 @@ MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) { } MCSymbol *MCContext::CreateSymbol(StringRef Name) { - // Determine whether this is an assembler temporary or normal label. - bool isTemporary = Name.startswith(MAI.getPrivateGlobalPrefix()); + // Determine whether this is an assembler temporary or normal label, if used. + bool isTemporary = false; + if (AllowTemporaryLabels) + isTemporary = Name.startswith(MAI.getPrivateGlobalPrefix()); StringMapEntry<bool> *NameEntry = &UsedNames.GetOrCreateValue(Name); if (NameEntry->getValue()) { assert(isTemporary && "Cannot rename non temporary symbols"); - SmallString<128> NewName; + SmallString<128> NewName = Name; do { - Twine T = Name + Twine(NextUniqueID++); - T.toVector(NewName); - StringRef foo = NewName; - NameEntry = &UsedNames.GetOrCreateValue(foo); + NewName.resize(Name.size()); + raw_svector_ostream(NewName) << NextUniqueID++; + NameEntry = &UsedNames.GetOrCreateValue(NewName); } while (NameEntry->getValue()); } NameEntry->setValue(true); @@ -107,9 +111,8 @@ MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) { MCSymbol *MCContext::CreateTempSymbol() { SmallString<128> NameSV; - Twine Name = Twine(MAI.getPrivateGlobalPrefix()) + "tmp" + - Twine(NextUniqueID++); - Name.toVector(NameSV); + raw_svector_ostream(NameSV) + << MAI.getPrivateGlobalPrefix() << "tmp" << NextUniqueID++; return CreateSymbol(NameSV); } diff --git a/lib/MC/MCDisassembler/CMakeLists.txt b/lib/MC/MCDisassembler/CMakeLists.txt index 5fa7b70..0ce359d 100644 --- a/lib/MC/MCDisassembler/CMakeLists.txt +++ b/lib/MC/MCDisassembler/CMakeLists.txt @@ -1,7 +1,8 @@ add_llvm_library(LLVMMCDisassembler + Disassembler.cpp EDDisassembler.cpp - EDOperand.cpp EDInst.cpp + EDOperand.cpp EDToken.cpp ) diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp new file mode 100644 index 0000000..ced57e8 --- /dev/null +++ b/lib/MC/MCDisassembler/Disassembler.cpp @@ -0,0 +1,171 @@ +//===-- lib/MC/Disassembler.cpp - Disassembler Public C Interface -*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "Disassembler.h" +#include <stdio.h> +#include "llvm-c/Disassembler.h" + +#include <string> +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetAsmInfo.h" // FIXME. +#include "llvm/Target/TargetMachine.h" // FIXME. +#include "llvm/Target/TargetSelect.h" +#include "llvm/Support/MemoryObject.h" + +namespace llvm { +class Target; +} // namespace llvm +using namespace llvm; + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +// +// LLVMCreateDisasm() creates a disassembler for the TripleName. Symbolic +// disassembly is supported by passing a block of information in the DisInfo +// parameter and specifing the TagType and call back functions as described in +// the header llvm-c/Disassembler.h . The pointer to the block and the +// functions can all be passed as NULL. If successful this returns a +// disassembler context if not it returns NULL. +// +LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo, + int TagType, LLVMOpInfoCallback GetOpInfo, + LLVMSymbolLookupCallback SymbolLookUp) { + // Initialize targets and assembly printers/parsers. + llvm::InitializeAllTargetInfos(); + // FIXME: We shouldn't need to initialize the Target(Machine)s. + llvm::InitializeAllTargets(); + llvm::InitializeAllAsmPrinters(); + llvm::InitializeAllAsmParsers(); + llvm::InitializeAllDisassemblers(); + + // Get the target. + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); + assert(TheTarget && "Unable to create target!"); + + // Get the assembler info needed to setup the MCContext. + const MCAsmInfo *MAI = TheTarget->createAsmInfo(TripleName); + assert(MAI && "Unable to create target asm info!"); + + // Package up features to be passed to target/subtarget + std::string FeaturesStr; + + // FIXME: We shouldn't need to do this (and link in codegen). + // When we split this out, we should do it in a way that makes + // it straightforward to switch subtargets on the fly. + TargetMachine *TM = TheTarget->createTargetMachine(TripleName, FeaturesStr); + assert(TM && "Unable to create target machine!"); + + // Get the target assembler info needed to setup the context. + const TargetAsmInfo *tai = new TargetAsmInfo(*TM); + assert(tai && "Unable to create target assembler!"); + + // Set up the MCContext for creating symbols and MCExpr's. + MCContext *Ctx = new MCContext(*MAI, tai); + assert(Ctx && "Unable to create MCContext!"); + + // Set up disassembler. + MCDisassembler *DisAsm = TheTarget->createMCDisassembler(); + assert(DisAsm && "Unable to create disassembler!"); + DisAsm->setupForSymbolicDisassembly(GetOpInfo, DisInfo, Ctx); + + // Set up the instruction printer. + int AsmPrinterVariant = MAI->getAssemblerDialect(); + MCInstPrinter *IP = TheTarget->createMCInstPrinter(*TM, AsmPrinterVariant, + *MAI); + assert(IP && "Unable to create instruction printer!"); + + LLVMDisasmContext *DC = new LLVMDisasmContext(TripleName, DisInfo, TagType, + GetOpInfo, SymbolLookUp, + TheTarget, MAI, TM, tai, Ctx, + DisAsm, IP); + assert(DC && "Allocation failure!"); + return DC; +} + +// +// LLVMDisasmDispose() disposes of the disassembler specified by the context. +// +void LLVMDisasmDispose(LLVMDisasmContextRef DCR){ + LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR; + delete DC; +} + +namespace { +// +// The memory object created by LLVMDisasmInstruction(). +// +class DisasmMemoryObject : public MemoryObject { +private: + uint8_t *Bytes; + uint64_t Size; + uint64_t BasePC; +public: + DisasmMemoryObject(uint8_t *bytes, uint64_t size, uint64_t basePC) : + Bytes(bytes), Size(size), BasePC(basePC) {} + + uint64_t getBase() const { return BasePC; } + uint64_t getExtent() const { return Size; } + + int readByte(uint64_t Addr, uint8_t *Byte) const { + if (Addr - BasePC >= Size) + return -1; + *Byte = Bytes[Addr - BasePC]; + return 0; + } +}; +} // namespace + +// +// LLVMDisasmInstruction() disassembles a single instruction using the +// disassembler context specified in the parameter DC. The bytes of the +// instruction are specified in the parameter Bytes, and contains at least +// BytesSize number of bytes. The instruction is at the address specified by +// the PC parameter. If a valid instruction can be disassembled its string is +// returned indirectly in OutString which whos size is specified in the +// parameter OutStringSize. This function returns the number of bytes in the +// instruction or zero if there was no valid instruction. If this function +// returns zero the caller will have to pick how many bytes they want to step +// over by printing a .byte, .long etc. to continue. +// +size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes, + uint64_t BytesSize, uint64_t PC, char *OutString, + size_t OutStringSize){ + LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR; + // Wrap the pointer to the Bytes, BytesSize and PC in a MemoryObject. + DisasmMemoryObject MemoryObject(Bytes, BytesSize, PC); + + uint64_t Size; + MCInst Inst; + const MCDisassembler *DisAsm = DC->getDisAsm(); + MCInstPrinter *IP = DC->getIP(); + if (!DisAsm->getInstruction(Inst, Size, MemoryObject, PC, /*REMOVE*/ nulls())) + return 0; + + std::string InsnStr; + raw_string_ostream OS(InsnStr); + IP->printInst(&Inst, OS); + OS.flush(); + + size_t OutputSize = std::min(OutStringSize-1, InsnStr.size()); + std::memcpy(OutString, InsnStr.data(), OutputSize); + OutString[OutputSize] = '\0'; // Terminate string. + + return Size; +} + +#ifdef __cplusplus +} +#endif // __cplusplus diff --git a/lib/MC/MCDisassembler/Disassembler.h b/lib/MC/MCDisassembler/Disassembler.h new file mode 100644 index 0000000..f0ec42a --- /dev/null +++ b/lib/MC/MCDisassembler/Disassembler.h @@ -0,0 +1,96 @@ +//===------------- Disassembler.h - LLVM Disassembler -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the Disassembly library's disassembler +// context. The disassembler is responsible for producing strings for +// individual instructions according to a given architecture and disassembly +// syntax. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_DISASSEMBLER_H +#define LLVM_MC_DISASSEMBLER_H + +#include "llvm-c/Disassembler.h" +#include <string> +#include "llvm/ADT/OwningPtr.h" + +namespace llvm { +class TargetAsmInfo; +class MCContext; +class MCAsmInfo; +class MCDisassembler; +class MCInstPrinter; +class Target; +class TargetMachine; + +// +// This is the disassembler context returned by LLVMCreateDisasm(). +// +class LLVMDisasmContext { +private: + // + // The passed parameters when the disassembler context is created. + // + // The TripleName for this disassembler. + std::string TripleName; + // The pointer to the caller's block of symbolic information. + void *DisInfo; + // The Triple specific symbolic information type returned by GetOpInfo. + int TagType; + // The function to get the symbolic information for operands. + LLVMOpInfoCallback GetOpInfo; + // The function to look up a symbol name. + LLVMSymbolLookupCallback SymbolLookUp; + // + // The objects created and saved by LLVMCreateDisasm() then used by + // LLVMDisasmInstruction(). + // + // The LLVM target corresponding to the disassembler. + // FIXME: using llvm::OwningPtr<const llvm::Target> causes a malloc error + // when this LLVMDisasmContext is deleted. + const Target *TheTarget; + // The assembly information for the target architecture. + llvm::OwningPtr<const llvm::MCAsmInfo> MAI; + // The target machine instance. + llvm::OwningPtr<llvm::TargetMachine> TM; + // The disassembler for the target architecture. + // FIXME: using llvm::OwningPtr<const llvm::TargetAsmInfo> causes a malloc + // error when this LLVMDisasmContext is deleted. + const TargetAsmInfo *Tai; + // The assembly context for creating symbols and MCExprs. + llvm::OwningPtr<const llvm::MCContext> Ctx; + // The disassembler for the target architecture. + llvm::OwningPtr<const llvm::MCDisassembler> DisAsm; + // The instruction printer for the target architecture. + llvm::OwningPtr<llvm::MCInstPrinter> IP; + +public: + LLVMDisasmContext(std::string tripleName, void *disInfo, int tagType, + LLVMOpInfoCallback getOpInfo, + LLVMSymbolLookupCallback symbolLookUp, + const Target *theTarget, const MCAsmInfo *mAI, + llvm::TargetMachine *tM, const TargetAsmInfo *tai, + llvm::MCContext *ctx, const MCDisassembler *disAsm, + MCInstPrinter *iP) : TripleName(tripleName), + DisInfo(disInfo), TagType(tagType), GetOpInfo(getOpInfo), + SymbolLookUp(symbolLookUp), TheTarget(theTarget), Tai(tai) { + TM.reset(tM); + MAI.reset(mAI); + Ctx.reset(ctx); + DisAsm.reset(disAsm); + IP.reset(iP); + } + const MCDisassembler *getDisAsm() const { return DisAsm.get(); } + MCInstPrinter *getIP() { return IP.get(); } +}; + +} // namespace llvm + +#endif diff --git a/lib/MC/MCDisassembler/EDDisassembler.cpp b/lib/MC/MCDisassembler/EDDisassembler.cpp index 2fd14db..91c5284 100644 --- a/lib/MC/MCDisassembler/EDDisassembler.cpp +++ b/lib/MC/MCDisassembler/EDDisassembler.cpp @@ -193,7 +193,8 @@ EDDisassembler::EDDisassembler(CPUKey &key) : InstString.reset(new std::string); InstStream.reset(new raw_string_ostream(*InstString)); - InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo)); + InstPrinter.reset(Tgt->createMCInstPrinter(*TargetMachine, LLVMSyntaxVariant, + *AsmInfo)); if (!InstPrinter) return; @@ -253,9 +254,11 @@ EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader, delete inst; return NULL; } else { - const llvm::EDInstInfo *thisInstInfo; + const llvm::EDInstInfo *thisInstInfo = NULL; - thisInstInfo = &InstInfos[inst->getOpcode()]; + if (InstInfos) { + thisInstInfo = &InstInfos[inst->getOpcode()]; + } EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo); return sdInst; @@ -331,6 +334,15 @@ int EDDisassembler::printInst(std::string &str, MCInst &inst) { return 0; } +static void diag_handler(const SMDiagnostic &diag, + void *context) +{ + if (context) { + EDDisassembler *disassembler = static_cast<EDDisassembler*>(context); + diag.Print("", disassembler->ErrorStream); + } +} + int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands, SmallVectorImpl<AsmToken> &tokens, const std::string &str) { @@ -353,6 +365,7 @@ int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands, SMLoc instLoc; SourceMgr sourceMgr; + sourceMgr.setDiagHandler(diag_handler, static_cast<void*>(this)); sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over MCContext context(*AsmInfo, NULL); OwningPtr<MCStreamer> streamer(createNullStreamer(context)); diff --git a/lib/MC/MCDisassembler/EDDisassembler.h b/lib/MC/MCDisassembler/EDDisassembler.h index 71e45f0..2fcc09d 100644 --- a/lib/MC/MCDisassembler/EDDisassembler.h +++ b/lib/MC/MCDisassembler/EDDisassembler.h @@ -87,13 +87,8 @@ struct EDDisassembler { /// operator< - Less-than operator bool operator<(const CPUKey &key) const { - if(Arch > key.Arch) - return false; - else if (Arch == key.Arch) { - if(Syntax > key.Syntax) - return false; - } - return true; + return ((Arch < key.Arch) || + ((Arch == key.Arch) && Syntax < (key.Syntax))); } }; diff --git a/lib/MC/MCDisassembler/EDInfo.h b/lib/MC/MCDisassembler/EDInfo.h index 627c066..ad57282 100644 --- a/lib/MC/MCDisassembler/EDInfo.h +++ b/lib/MC/MCDisassembler/EDInfo.h @@ -35,6 +35,7 @@ enum OperandTypes { kOperandTypeARMAddrMode5, kOperandTypeARMAddrMode6, kOperandTypeARMAddrMode6Offset, + kOperandTypeARMAddrMode7, kOperandTypeARMAddrModePC, kOperandTypeARMRegisterList, kOperandTypeARMTBAddrMode, @@ -51,7 +52,8 @@ enum OperandTypes { kOperandTypeThumb2AddrModeImm12, kOperandTypeThumb2AddrModeSoReg, kOperandTypeThumb2AddrModeImm8s4, - kOperandTypeThumb2AddrModeImm8s4Offset + kOperandTypeThumb2AddrModeImm8s4Offset, + kOperandTypeThumb2AddrModeReg }; enum OperandFlags { diff --git a/lib/MC/MCDisassembler/EDInst.cpp b/lib/MC/MCDisassembler/EDInst.cpp index 63b049f..6057e16 100644 --- a/lib/MC/MCDisassembler/EDInst.cpp +++ b/lib/MC/MCDisassembler/EDInst.cpp @@ -165,6 +165,9 @@ int EDInst::getOperand(EDOperand *&operand, unsigned int index) { int EDInst::tokenize() { if (TokenizeResult.valid()) return TokenizeResult.result(); + + if (ThisInstInfo == NULL) + return TokenizeResult.setResult(-1); if (stringify()) return TokenizeResult.setResult(-1); diff --git a/lib/MC/MCDisassembler/EDOperand.cpp b/lib/MC/MCDisassembler/EDOperand.cpp index 2b0c73e..492bb08 100644 --- a/lib/MC/MCDisassembler/EDOperand.cpp +++ b/lib/MC/MCDisassembler/EDOperand.cpp @@ -73,6 +73,8 @@ EDOperand::EDOperand(const EDDisassembler &disassembler, case kOperandTypeThumb2AddrModeImm8Offset: case kOperandTypeARMTBAddrMode: case kOperandTypeThumb2AddrModeImm8s4Offset: + case kOperandTypeARMAddrMode7: + case kOperandTypeThumb2AddrModeReg: numMCOperands = 1; break; case kOperandTypeThumb2SoReg: @@ -196,15 +198,24 @@ int EDOperand::evaluate(uint64_t &result, default: return -1; case kOperandTypeImmediate: + if (!Inst.Inst->getOperand(MCOpIndex).isImm()) + return -1; + result = Inst.Inst->getOperand(MCOpIndex).getImm(); return 0; case kOperandTypeRegister: { + if (!Inst.Inst->getOperand(MCOpIndex).isReg()) + return -1; + unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg(); return callback(&result, reg, arg); } case kOperandTypeARMBranchTarget: { + if (!Inst.Inst->getOperand(MCOpIndex).isImm()) + return -1; + int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm(); uint64_t pcVal; @@ -256,6 +267,7 @@ int EDOperand::isMemory() { case kOperandTypeARMAddrMode4: case kOperandTypeARMAddrMode5: case kOperandTypeARMAddrMode6: + case kOperandTypeARMAddrMode7: case kOperandTypeARMAddrModePC: case kOperandTypeARMBranchTarget: case kOperandTypeThumbAddrModeS1: @@ -269,6 +281,7 @@ int EDOperand::isMemory() { case kOperandTypeThumb2AddrModeImm12: case kOperandTypeThumb2AddrModeSoReg: case kOperandTypeThumb2AddrModeImm8s4: + case kOperandTypeThumb2AddrModeReg: return 1; } } diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 112d7d8..f61f0c2 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -17,6 +17,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -439,14 +440,100 @@ static int getDataAlignmentFactor(MCStreamer &streamer) { return -size; } -static void EmitCFIInstruction(MCStreamer &Streamer, - const MCCFIInstruction &Instr) { +static unsigned getSizeForEncoding(MCStreamer &streamer, + unsigned symbolEncoding) { + MCContext &context = streamer.getContext(); + const TargetAsmInfo &asmInfo = context.getTargetAsmInfo(); + unsigned format = symbolEncoding & 0x0f; + switch (format) { + default: + assert(0 && "Unknown Encoding"); + case dwarf::DW_EH_PE_absptr: + case dwarf::DW_EH_PE_signed: + return asmInfo.getPointerSize(); + case dwarf::DW_EH_PE_udata2: + case dwarf::DW_EH_PE_sdata2: + return 2; + case dwarf::DW_EH_PE_udata4: + case dwarf::DW_EH_PE_sdata4: + return 4; + case dwarf::DW_EH_PE_udata8: + case dwarf::DW_EH_PE_sdata8: + return 8; + } +} + +static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol, + unsigned symbolEncoding) { + MCContext &context = streamer.getContext(); + const MCAsmInfo &asmInfo = context.getAsmInfo(); + const MCExpr *v = asmInfo.getExprForFDESymbol(&symbol, + symbolEncoding, + streamer); + unsigned size = getSizeForEncoding(streamer, symbolEncoding); + streamer.EmitAbsValue(v, size); +} + +static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol, + unsigned symbolEncoding) { + MCContext &context = streamer.getContext(); + const MCAsmInfo &asmInfo = context.getAsmInfo(); + const MCExpr *v = asmInfo.getExprForPersonalitySymbol(&symbol, + symbolEncoding, + streamer); + unsigned size = getSizeForEncoding(streamer, symbolEncoding); + streamer.EmitValue(v, size); +} + +static const MachineLocation TranslateMachineLocation( + const TargetAsmInfo &AsmInfo, + const MachineLocation &Loc) { + unsigned Reg = Loc.getReg() == MachineLocation::VirtualFP ? + MachineLocation::VirtualFP : + unsigned(AsmInfo.getDwarfRegNum(Loc.getReg(), true)); + const MachineLocation &NewLoc = Loc.isReg() ? + MachineLocation(Reg) : MachineLocation(Reg, Loc.getOffset()); + return NewLoc; +} + +namespace { + class FrameEmitterImpl { + int CFAOffset; + int CIENum; + bool UsingCFI; + + public: + FrameEmitterImpl(bool usingCFI) : CFAOffset(0), CIENum(0), + UsingCFI(usingCFI) { + } + + const MCSymbol &EmitCIE(MCStreamer &streamer, + const MCSymbol *personality, + unsigned personalityEncoding, + const MCSymbol *lsda, + unsigned lsdaEncoding); + MCSymbol *EmitFDE(MCStreamer &streamer, + const MCSymbol &cieStart, + const MCDwarfFrameInfo &frame, + bool forceLsda); + void EmitCFIInstructions(MCStreamer &streamer, + const std::vector<MCCFIInstruction> &Instrs, + MCSymbol *BaseLabel); + void EmitCFIInstruction(MCStreamer &Streamer, + const MCCFIInstruction &Instr); + }; +} + +void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer, + const MCCFIInstruction &Instr) { int dataAlignmentFactor = getDataAlignmentFactor(Streamer); switch (Instr.getOperation()) { - case MCCFIInstruction::Move: { + case MCCFIInstruction::Move: + case MCCFIInstruction::RelMove: { const MachineLocation &Dst = Instr.getDestination(); const MachineLocation &Src = Instr.getSource(); + const bool IsRelative = Instr.getOperation() == MCCFIInstruction::RelMove; // If advancing cfa. if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { @@ -459,7 +546,12 @@ static void EmitCFIInstruction(MCStreamer &Streamer, Streamer.EmitULEB128IntValue(Src.getReg()); } - Streamer.EmitULEB128IntValue(-Src.getOffset(), 1); + if (IsRelative) + CFAOffset += Src.getOffset(); + else + CFAOffset = -Src.getOffset(); + + Streamer.EmitULEB128IntValue(CFAOffset); return; } @@ -471,7 +563,11 @@ static void EmitCFIInstruction(MCStreamer &Streamer, } unsigned Reg = Src.getReg(); - int Offset = Dst.getOffset() / dataAlignmentFactor; + + int Offset = Dst.getOffset(); + if (IsRelative) + Offset -= CFAOffset; + Offset = Offset / dataAlignmentFactor; if (Offset < 0) { Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended_sf, 1); @@ -479,11 +575,11 @@ static void EmitCFIInstruction(MCStreamer &Streamer, Streamer.EmitSLEB128IntValue(Offset); } else if (Reg < 64) { Streamer.EmitIntValue(dwarf::DW_CFA_offset + Reg, 1); - Streamer.EmitULEB128IntValue(Offset, 1); + Streamer.EmitULEB128IntValue(Offset); } else { Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended, 1); - Streamer.EmitULEB128IntValue(Reg, 1); - Streamer.EmitULEB128IntValue(Offset, 1); + Streamer.EmitULEB128IntValue(Reg); + Streamer.EmitULEB128IntValue(Offset); } return; } @@ -493,15 +589,21 @@ static void EmitCFIInstruction(MCStreamer &Streamer, case MCCFIInstruction::Restore: Streamer.EmitIntValue(dwarf::DW_CFA_restore_state, 1); return; + case MCCFIInstruction::SameValue: { + unsigned Reg = Instr.getDestination().getReg(); + Streamer.EmitIntValue(dwarf::DW_CFA_same_value, 1); + Streamer.EmitULEB128IntValue(Reg); + return; + } } llvm_unreachable("Unhandled case in switch"); } /// EmitFrameMoves - Emit frame instructions to describe the layout of the /// frame. -static void EmitCFIInstructions(MCStreamer &streamer, - const std::vector<MCCFIInstruction> &Instrs, - MCSymbol *BaseLabel) { +void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer, + const std::vector<MCCFIInstruction> &Instrs, + MCSymbol *BaseLabel) { for (unsigned i = 0, N = Instrs.size(); i < N; ++i) { const MCCFIInstruction &Instr = Instrs[i]; MCSymbol *Label = Instr.getLabel(); @@ -521,74 +623,31 @@ static void EmitCFIInstructions(MCStreamer &streamer, } } -static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol, - unsigned symbolEncoding) { - MCContext &context = streamer.getContext(); - const TargetAsmInfo &asmInfo = context.getTargetAsmInfo(); - unsigned format = symbolEncoding & 0x0f; - unsigned application = symbolEncoding & 0x70; - unsigned size; - switch (format) { - default: - assert(0 && "Unknown Encoding"); - case dwarf::DW_EH_PE_absptr: - case dwarf::DW_EH_PE_signed: - size = asmInfo.getPointerSize(); - break; - case dwarf::DW_EH_PE_udata2: - case dwarf::DW_EH_PE_sdata2: - size = 2; - break; - case dwarf::DW_EH_PE_udata4: - case dwarf::DW_EH_PE_sdata4: - size = 4; - break; - case dwarf::DW_EH_PE_udata8: - case dwarf::DW_EH_PE_sdata8: - size = 8; - break; - } - switch (application) { - default: - assert(0 && "Unknown Encoding"); - break; - case 0: - streamer.EmitSymbolValue(&symbol, size); - break; - case dwarf::DW_EH_PE_pcrel: - streamer.EmitPCRelSymbolValue(&symbol, size); - break; - } -} - -static const MachineLocation TranslateMachineLocation( - const TargetAsmInfo &AsmInfo, - const MachineLocation &Loc) { - unsigned Reg = Loc.getReg() == MachineLocation::VirtualFP ? - MachineLocation::VirtualFP : - unsigned(AsmInfo.getDwarfRegNum(Loc.getReg(), true)); - const MachineLocation &NewLoc = Loc.isReg() ? - MachineLocation(Reg) : MachineLocation(Reg, Loc.getOffset()); - return NewLoc; -} - -static const MCSymbol &EmitCIE(MCStreamer &streamer, - const MCSymbol *personality, - unsigned personalityEncoding, - const MCSymbol *lsda, - unsigned lsdaEncoding) { +const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, + const MCSymbol *personality, + unsigned personalityEncoding, + const MCSymbol *lsda, + unsigned lsdaEncoding) { MCContext &context = streamer.getContext(); const TargetAsmInfo &asmInfo = context.getTargetAsmInfo(); const MCSection §ion = *asmInfo.getEHFrameSection(); streamer.SwitchSection(§ion); - MCSymbol *sectionStart = streamer.getContext().CreateTempSymbol(); + + MCSymbol *sectionStart; + if (asmInfo.isFunctionEHFrameSymbolPrivate()) + sectionStart = context.CreateTempSymbol(); + else + sectionStart = context.GetOrCreateSymbol(Twine("EH_frame") + Twine(CIENum)); + + CIENum++; + MCSymbol *sectionEnd = streamer.getContext().CreateTempSymbol(); // Length const MCExpr *Length = MakeStartMinusEndExpr(streamer, *sectionStart, *sectionEnd, 4); streamer.EmitLabel(sectionStart); - streamer.EmitValue(Length, 4); + streamer.EmitAbsValue(Length, 4); // CIE ID streamer.EmitIntValue(0, 4); @@ -617,28 +676,35 @@ static const MCSymbol &EmitCIE(MCStreamer &streamer, streamer.EmitULEB128IntValue(asmInfo.getDwarfRARegNum(true)); // Augmentation Data Length (optional) - MCSymbol *augmentationStart = streamer.getContext().CreateTempSymbol(); - MCSymbol *augmentationEnd = streamer.getContext().CreateTempSymbol(); - const MCExpr *augmentationLength = MakeStartMinusEndExpr(streamer, - *augmentationStart, - *augmentationEnd, 0); - streamer.EmitULEB128Value(augmentationLength); + + unsigned augmentationLength = 0; + if (personality) { + // Personality Encoding + augmentationLength += 1; + // Personality + augmentationLength += getSizeForEncoding(streamer, personalityEncoding); + } + if (lsda) { + augmentationLength += 1; + } + // Encoding of the FDE pointers + augmentationLength += 1; + + streamer.EmitULEB128IntValue(augmentationLength); // Augmentation Data (optional) - streamer.EmitLabel(augmentationStart); if (personality) { // Personality Encoding streamer.EmitIntValue(personalityEncoding, 1); // Personality - EmitSymbol(streamer, *personality, personalityEncoding); + EmitPersonality(streamer, *personality, personalityEncoding); } if (lsda) { // LSDA Encoding streamer.EmitIntValue(lsdaEncoding, 1); } // Encoding of the FDE pointers - streamer.EmitIntValue(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4, 1); - streamer.EmitLabel(augmentationEnd); + streamer.EmitIntValue(asmInfo.getFDEEncoding(UsingCFI), 1); // Initial Instructions @@ -664,50 +730,66 @@ static const MCSymbol &EmitCIE(MCStreamer &streamer, return *sectionStart; } -static MCSymbol *EmitFDE(MCStreamer &streamer, - const MCSymbol &cieStart, - const MCDwarfFrameInfo &frame) { +MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, + const MCSymbol &cieStart, + const MCDwarfFrameInfo &frame, + bool forceLsda) { MCContext &context = streamer.getContext(); MCSymbol *fdeStart = context.CreateTempSymbol(); MCSymbol *fdeEnd = context.CreateTempSymbol(); + const TargetAsmInfo &asmInfo = context.getTargetAsmInfo(); + + if (!asmInfo.isFunctionEHFrameSymbolPrivate()) { + Twine EHName = frame.Function->getName() + Twine(".eh"); + MCSymbol *EHSym = context.GetOrCreateSymbol(EHName); + streamer.EmitEHSymAttributes(frame.Function, EHSym); + streamer.EmitLabel(EHSym); + } // Length const MCExpr *Length = MakeStartMinusEndExpr(streamer, *fdeStart, *fdeEnd, 0); - streamer.EmitValue(Length, 4); + streamer.EmitAbsValue(Length, 4); streamer.EmitLabel(fdeStart); // CIE Pointer const MCExpr *offset = MakeStartMinusEndExpr(streamer, cieStart, *fdeStart, 0); - streamer.EmitValue(offset, 4); + streamer.EmitAbsValue(offset, 4); + unsigned fdeEncoding = asmInfo.getFDEEncoding(UsingCFI); + unsigned size = getSizeForEncoding(streamer, fdeEncoding); // PC Begin - streamer.EmitPCRelSymbolValue(frame.Begin, 4); + EmitSymbol(streamer, *frame.Begin, fdeEncoding); // PC Range const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin, *frame.End, 0); - streamer.EmitValue(Range, 4); + streamer.EmitAbsValue(Range, size); // Augmentation Data Length - MCSymbol *augmentationStart = streamer.getContext().CreateTempSymbol(); - MCSymbol *augmentationEnd = streamer.getContext().CreateTempSymbol(); - const MCExpr *augmentationLength = MakeStartMinusEndExpr(streamer, - *augmentationStart, - *augmentationEnd, 0); - streamer.EmitULEB128Value(augmentationLength); + unsigned augmentationLength = 0; + + if (frame.Lsda || forceLsda) + augmentationLength += getSizeForEncoding(streamer, frame.LsdaEncoding); + + streamer.EmitULEB128IntValue(augmentationLength); // Augmentation Data - streamer.EmitLabel(augmentationStart); + + // When running in "CodeGen compatibility mode" a FDE with no LSDA can be + // assigned to a CIE that requires one. In that case we output a 0 (as does + // CodeGen). if (frame.Lsda) EmitSymbol(streamer, *frame.Lsda, frame.LsdaEncoding); - streamer.EmitLabel(augmentationEnd); + else if (forceLsda) + streamer.EmitIntValue(0, getSizeForEncoding(streamer, frame.LsdaEncoding)); + // Call Frame Instructions EmitCFIInstructions(streamer, frame.Instructions, frame.Begin); // Padding - streamer.EmitValueToAlignment(4); + streamer.EmitValueToAlignment(size); return fdeEnd; } @@ -753,11 +835,78 @@ namespace llvm { }; } -void MCDwarfFrameEmitter::Emit(MCStreamer &streamer) { +// This is an implementation of CIE and FDE emission that is bug by bug +// compatible with the one in CodeGen. It is useful during the transition +// to make it easy to compare the outputs, but should probably be removed +// afterwards. +void MCDwarfFrameEmitter::EmitDarwin(MCStreamer &streamer, + bool usingCFI) { + FrameEmitterImpl Emitter(usingCFI); + DenseMap<const MCSymbol*, const MCSymbol*> Personalities; + const MCSymbol *aCIE = NULL; + const MCDwarfFrameInfo *aFrame = NULL; + + for (unsigned i = 0, n = streamer.getNumFrameInfos(); i < n; ++i) { + const MCDwarfFrameInfo &frame = streamer.getFrameInfo(i); + if (!frame.Personality) + continue; + if (Personalities.count(frame.Personality)) + continue; + + const MCSymbol *cieStart = &Emitter.EmitCIE(streamer, frame.Personality, + frame.PersonalityEncoding, + frame.Lsda, + frame.LsdaEncoding); + aCIE = cieStart; + aFrame = &frame; + Personalities[frame.Personality] = cieStart; + } + + if (Personalities.empty()) { + const MCDwarfFrameInfo &frame = streamer.getFrameInfo(0); + aCIE = &Emitter.EmitCIE(streamer, frame.Personality, + frame.PersonalityEncoding, frame.Lsda, + frame.LsdaEncoding); + aFrame = &frame; + } + + MCSymbol *fdeEnd = NULL; + for (unsigned i = 0, n = streamer.getNumFrameInfos(); i < n; ++i) { + const MCDwarfFrameInfo &frame = streamer.getFrameInfo(i); + const MCSymbol *cieStart = Personalities[frame.Personality]; + bool hasLSDA; + if (!cieStart) { + cieStart = aCIE; + hasLSDA = aFrame->Lsda; + } else { + hasLSDA = true; + } + + fdeEnd = Emitter.EmitFDE(streamer, *cieStart, frame, + hasLSDA); + if (i != n - 1) + streamer.EmitLabel(fdeEnd); + } + + const MCContext &context = streamer.getContext(); + const TargetAsmInfo &asmInfo = context.getTargetAsmInfo(); + streamer.EmitValueToAlignment(asmInfo.getPointerSize()); + if (fdeEnd) + streamer.EmitLabel(fdeEnd); +} + +void MCDwarfFrameEmitter::Emit(MCStreamer &streamer, + bool usingCFI) { const MCContext &context = streamer.getContext(); const TargetAsmInfo &asmInfo = context.getTargetAsmInfo(); + if (!asmInfo.isFunctionEHFrameSymbolPrivate()) { + EmitDarwin(streamer, usingCFI); + return; + } + MCSymbol *fdeEnd = NULL; DenseMap<CIEKey, const MCSymbol*> CIEStarts; + FrameEmitterImpl Emitter(usingCFI); for (unsigned i = 0, n = streamer.getNumFrameInfos(); i < n; ++i) { const MCDwarfFrameInfo &frame = streamer.getFrameInfo(i); @@ -765,10 +914,10 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &streamer) { frame.LsdaEncoding); const MCSymbol *&cieStart = CIEStarts[key]; if (!cieStart) - cieStart = &EmitCIE(streamer, frame.Personality, - frame.PersonalityEncoding, frame.Lsda, - frame.LsdaEncoding); - fdeEnd = EmitFDE(streamer, *cieStart, frame); + cieStart = &Emitter.EmitCIE(streamer, frame.Personality, + frame.PersonalityEncoding, frame.Lsda, + frame.LsdaEncoding); + fdeEnd = Emitter.EmitFDE(streamer, *cieStart, frame, false); if (i != n - 1) streamer.EmitLabel(fdeEnd); } @@ -782,21 +931,28 @@ void MCDwarfFrameEmitter::EmitAdvanceLoc(MCStreamer &Streamer, uint64_t AddrDelta) { SmallString<256> Tmp; raw_svector_ostream OS(Tmp); - MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OS); + const TargetAsmInfo &AsmInfo = Streamer.getContext().getTargetAsmInfo(); + MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OS, AsmInfo); Streamer.EmitBytes(OS.str(), /*AddrSpace=*/0); } void MCDwarfFrameEmitter::EncodeAdvanceLoc(uint64_t AddrDelta, - raw_ostream &OS) { + raw_ostream &OS, + const TargetAsmInfo &AsmInfo) { + // This is a small hack to facilitate the transition to CFI on OS X. It + // relaxes all address advances which lets us produces identical output + // to the one produce by CodeGen. + const bool Relax = !AsmInfo.isFunctionEHFrameSymbolPrivate(); + // FIXME: Assumes the code alignment factor is 1. if (AddrDelta == 0) { - } else if (isUIntN(6, AddrDelta)) { + } else if (isUIntN(6, AddrDelta) && !Relax) { uint8_t Opcode = dwarf::DW_CFA_advance_loc | AddrDelta; OS << Opcode; - } else if (isUInt<8>(AddrDelta)) { + } else if (isUInt<8>(AddrDelta) && !Relax) { OS << uint8_t(dwarf::DW_CFA_advance_loc1); OS << uint8_t(AddrDelta); - } else if (isUInt<16>(AddrDelta)) { + } else if (isUInt<16>(AddrDelta) && !Relax) { // FIXME: check what is the correct behavior on a big endian machine. OS << uint8_t(dwarf::DW_CFA_advance_loc2); OS << uint8_t( AddrDelta & 0xff); diff --git a/lib/MC/MCELF.cpp b/lib/MC/MCELF.cpp new file mode 100644 index 0000000..ce7783e --- /dev/null +++ b/lib/MC/MCELF.cpp @@ -0,0 +1,72 @@ +//===- lib/MC/MCELF.cpp - MC ELF ------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ELF object file writer information. +// +//===----------------------------------------------------------------------===// + +#include "MCELF.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCELFSymbolFlags.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/Support/ELF.h" +#include "llvm/Target/TargetAsmBackend.h" + +namespace llvm { + +void MCELF::SetBinding(MCSymbolData &SD, unsigned Binding) { + assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL || + Binding == ELF::STB_WEAK); + uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STB_Shift); + SD.setFlags(OtherFlags | (Binding << ELF_STB_Shift)); +} + +unsigned MCELF::GetBinding(const MCSymbolData &SD) { + uint32_t Binding = (SD.getFlags() & (0xf << ELF_STB_Shift)) >> ELF_STB_Shift; + assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL || + Binding == ELF::STB_WEAK); + return Binding; +} + +void MCELF::SetType(MCSymbolData &SD, unsigned Type) { + assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT || + Type == ELF::STT_FUNC || Type == ELF::STT_SECTION || + Type == ELF::STT_FILE || Type == ELF::STT_COMMON || + Type == ELF::STT_TLS); + + uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STT_Shift); + SD.setFlags(OtherFlags | (Type << ELF_STT_Shift)); +} + +unsigned MCELF::GetType(const MCSymbolData &SD) { + uint32_t Type = (SD.getFlags() & (0xf << ELF_STT_Shift)) >> ELF_STT_Shift; + assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT || + Type == ELF::STT_FUNC || Type == ELF::STT_SECTION || + Type == ELF::STT_FILE || Type == ELF::STT_COMMON || + Type == ELF::STT_TLS); + return Type; +} + +void MCELF::SetVisibility(MCSymbolData &SD, unsigned Visibility) { + assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL || + Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED); + + uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STV_Shift); + SD.setFlags(OtherFlags | (Visibility << ELF_STV_Shift)); +} + +unsigned MCELF::GetVisibility(MCSymbolData &SD) { + unsigned Visibility = + (SD.getFlags() & (0xf << ELF_STV_Shift)) >> ELF_STV_Shift; + assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL || + Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED); + return Visibility; +} + +} diff --git a/lib/MC/MCELF.h b/lib/MC/MCELF.h new file mode 100644 index 0000000..e08f1e6 --- /dev/null +++ b/lib/MC/MCELF.h @@ -0,0 +1,35 @@ +//===- lib/MC/MCELF.h - ELF MC --------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains some support functions used by the ELF Streamer and +// ObjectWriter. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCELF_H +#define LLVM_MC_MCELF_H + +#include "llvm/MC/MCExpr.h" + +namespace llvm { +class MCSymbolData; + +class MCELF { + public: + static void SetBinding(MCSymbolData &SD, unsigned Binding); + static unsigned GetBinding(const MCSymbolData &SD); + static void SetType(MCSymbolData &SD, unsigned Type); + static unsigned GetType(const MCSymbolData &SD); + static void SetVisibility(MCSymbolData &SD, unsigned Visibility); + static unsigned GetVisibility(MCSymbolData &SD); +}; + +} + +#endif diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index e49074d..be8e2e3 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -11,18 +11,14 @@ // //===----------------------------------------------------------------------===// +#include "MCELFStreamer.h" +#include "MCELF.h" #include "llvm/MC/MCStreamer.h" - -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCELFSymbolFlags.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCSection.h" -#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" @@ -34,148 +30,6 @@ using namespace llvm; -namespace { - -static void SetBinding(MCSymbolData &SD, unsigned Binding) { - assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL || - Binding == ELF::STB_WEAK); - uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STB_Shift); - SD.setFlags(OtherFlags | (Binding << ELF_STB_Shift)); -} - -static unsigned GetBinding(const MCSymbolData &SD) { - uint32_t Binding = (SD.getFlags() & (0xf << ELF_STB_Shift)) >> ELF_STB_Shift; - assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL || - Binding == ELF::STB_WEAK); - return Binding; -} - -static void SetType(MCSymbolData &SD, unsigned Type) { - assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT || - Type == ELF::STT_FUNC || Type == ELF::STT_SECTION || - Type == ELF::STT_FILE || Type == ELF::STT_COMMON || - Type == ELF::STT_TLS); - - uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STT_Shift); - SD.setFlags(OtherFlags | (Type << ELF_STT_Shift)); -} - -static void SetVisibility(MCSymbolData &SD, unsigned Visibility) { - assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL || - Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED); - - uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STV_Shift); - SD.setFlags(OtherFlags | (Visibility << ELF_STV_Shift)); -} - -class MCELFStreamer : public MCObjectStreamer { -public: - MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter) - : MCObjectStreamer(Context, TAB, OS, Emitter) {} - - ~MCELFStreamer() {} - - /// @name MCStreamer Interface - /// @{ - - virtual void InitSections(); - virtual void ChangeSection(const MCSection *Section); - virtual void EmitLabel(MCSymbol *Symbol); - virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); - virtual void EmitThumbFunc(MCSymbol *Func); - virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); - virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol); - virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); - virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { - assert(0 && "ELF doesn't support this directive"); - } - virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment); - virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) { - assert(0 && "ELF doesn't support this directive"); - } - - virtual void EmitCOFFSymbolStorageClass(int StorageClass) { - assert(0 && "ELF doesn't support this directive"); - } - - virtual void EmitCOFFSymbolType(int Type) { - assert(0 && "ELF doesn't support this directive"); - } - - virtual void EndCOFFSymbolDef() { - assert(0 && "ELF doesn't support this directive"); - } - - virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - SD.setSize(Value); - } - - virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size); - - virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, - unsigned Size = 0, unsigned ByteAlignment = 0) { - assert(0 && "ELF doesn't support this directive"); - } - virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, - uint64_t Size, unsigned ByteAlignment = 0) { - assert(0 && "ELF doesn't support this directive"); - } - virtual void EmitBytes(StringRef Data, unsigned AddrSpace); - virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, - unsigned ValueSize = 1, - unsigned MaxBytesToEmit = 0); - virtual void EmitCodeAlignment(unsigned ByteAlignment, - unsigned MaxBytesToEmit = 0); - - virtual void EmitFileDirective(StringRef Filename); - - virtual void Finish(); - -private: - virtual void EmitInstToFragment(const MCInst &Inst); - virtual void EmitInstToData(const MCInst &Inst); - - void fixSymbolsInTLSFixups(const MCExpr *expr); - - struct LocalCommon { - MCSymbolData *SD; - uint64_t Size; - unsigned ByteAlignment; - }; - std::vector<LocalCommon> LocalCommons; - - SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet; - /// @} - void SetSection(StringRef Section, unsigned Type, unsigned Flags, - SectionKind Kind) { - SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind)); - } - - void SetSectionData() { - SetSection(".data", ELF::SHT_PROGBITS, - ELF::SHF_WRITE |ELF::SHF_ALLOC, - SectionKind::getDataRel()); - EmitCodeAlignment(4, 0); - } - void SetSectionText() { - SetSection(".text", ELF::SHT_PROGBITS, - ELF::SHF_EXECINSTR | - ELF::SHF_ALLOC, SectionKind::getText()); - EmitCodeAlignment(4, 0); - } - void SetSectionBss() { - SetSection(".bss", ELF::SHT_NOBITS, - ELF::SHF_WRITE | - ELF::SHF_ALLOC, SectionKind::getBSS()); - EmitCodeAlignment(4, 0); - } -}; - -} // end anonymous namespace. - void MCELFStreamer::InitSections() { // This emulates the same behavior of GNU as. This makes it easier // to compare the output as the major sections are in the same order. @@ -194,7 +48,7 @@ void MCELFStreamer::EmitLabel(MCSymbol *Symbol) { static_cast<const MCSectionELF&>(Symbol->getSection()); MCSymbolData &SD = getAssembler().getSymbolData(*Symbol); if (Section.getFlags() & ELF::SHF_TLS) - SetType(SD, ELF::STT_TLS); + MCELF::SetType(SD, ELF::STT_TLS); } void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { @@ -281,54 +135,54 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, break; case MCSA_Global: - SetBinding(SD, ELF::STB_GLOBAL); + MCELF::SetBinding(SD, ELF::STB_GLOBAL); SD.setExternal(true); BindingExplicitlySet.insert(Symbol); break; case MCSA_WeakReference: case MCSA_Weak: - SetBinding(SD, ELF::STB_WEAK); + MCELF::SetBinding(SD, ELF::STB_WEAK); SD.setExternal(true); BindingExplicitlySet.insert(Symbol); break; case MCSA_Local: - SetBinding(SD, ELF::STB_LOCAL); + MCELF::SetBinding(SD, ELF::STB_LOCAL); SD.setExternal(false); BindingExplicitlySet.insert(Symbol); break; case MCSA_ELF_TypeFunction: - SetType(SD, ELF::STT_FUNC); + MCELF::SetType(SD, ELF::STT_FUNC); break; case MCSA_ELF_TypeObject: - SetType(SD, ELF::STT_OBJECT); + MCELF::SetType(SD, ELF::STT_OBJECT); break; case MCSA_ELF_TypeTLS: - SetType(SD, ELF::STT_TLS); + MCELF::SetType(SD, ELF::STT_TLS); break; case MCSA_ELF_TypeCommon: - SetType(SD, ELF::STT_COMMON); + MCELF::SetType(SD, ELF::STT_COMMON); break; case MCSA_ELF_TypeNoType: - SetType(SD, ELF::STT_NOTYPE); + MCELF::SetType(SD, ELF::STT_NOTYPE); break; case MCSA_Protected: - SetVisibility(SD, ELF::STV_PROTECTED); + MCELF::SetVisibility(SD, ELF::STV_PROTECTED); break; case MCSA_Hidden: - SetVisibility(SD, ELF::STV_HIDDEN); + MCELF::SetVisibility(SD, ELF::STV_HIDDEN); break; case MCSA_Internal: - SetVisibility(SD, ELF::STV_INTERNAL); + MCELF::SetVisibility(SD, ELF::STV_INTERNAL); break; } } @@ -338,13 +192,13 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); if (!BindingExplicitlySet.count(Symbol)) { - SetBinding(SD, ELF::STB_GLOBAL); + MCELF::SetBinding(SD, ELF::STB_GLOBAL); SD.setExternal(true); } - SetType(SD, ELF::STT_OBJECT); + MCELF::SetType(SD, ELF::STT_OBJECT); - if (GetBinding(SD) == ELF_STB_Local) { + if (MCELF::GetBinding(SD) == ELF_STB_Local) { const MCSection *Section = getAssembler().getContext().getELFSection(".bss", ELF::SHT_NOBITS, ELF::SHF_WRITE | @@ -364,7 +218,7 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) { // FIXME: Should this be caught and done earlier? MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - SetBinding(SD, ELF::STB_LOCAL); + MCELF::SetBinding(SD, ELF::STB_LOCAL); SD.setExternal(false); BindingExplicitlySet.insert(Symbol); // FIXME: ByteAlignment is not needed here, but is required. @@ -437,19 +291,22 @@ void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) { switch (symRef.getKind()) { default: return; + case MCSymbolRefExpr::VK_GOTTPOFF: + case MCSymbolRefExpr::VK_INDNTPOFF: case MCSymbolRefExpr::VK_NTPOFF: case MCSymbolRefExpr::VK_GOTNTPOFF: case MCSymbolRefExpr::VK_TLSGD: + case MCSymbolRefExpr::VK_TLSLD: case MCSymbolRefExpr::VK_TLSLDM: case MCSymbolRefExpr::VK_TPOFF: case MCSymbolRefExpr::VK_DTPOFF: - case MCSymbolRefExpr::VK_GOTTPOFF: - case MCSymbolRefExpr::VK_TLSLD: case MCSymbolRefExpr::VK_ARM_TLSGD: + case MCSymbolRefExpr::VK_ARM_TPOFF: + case MCSymbolRefExpr::VK_ARM_GOTTPOFF: break; } MCSymbolData &SD = getAssembler().getOrCreateSymbolData(symRef.getSymbol()); - SetType(SD, ELF::STT_TLS); + MCELF::SetType(SD, ELF::STT_TLS); break; } @@ -489,7 +346,7 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst) { void MCELFStreamer::Finish() { if (getNumFrameInfos()) - MCDwarfFrameEmitter::Emit(*this); + MCDwarfFrameEmitter::Emit(*this, true); for (std::vector<LocalCommon>::const_iterator i = LocalCommons.begin(), e = LocalCommons.end(); diff --git a/lib/MC/MCELFStreamer.h b/lib/MC/MCELFStreamer.h new file mode 100644 index 0000000..db34d58 --- /dev/null +++ b/lib/MC/MCELFStreamer.h @@ -0,0 +1,274 @@ +//===- lib/MC/MCELFStreamer.h - ELF Object Output -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file assembles .s files and emits ELF .o object files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCELFSTREAMER_H +#define LLVM_MC_MCELFSTREAMER_H + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCSectionELF.h" + +namespace llvm { + +class MCELFStreamer : public MCObjectStreamer { +public: + MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter) + : MCObjectStreamer(Context, TAB, OS, Emitter) {} + + MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + MCAssembler *Assembler) + : MCObjectStreamer(Context, TAB, OS, Emitter, Assembler) {} + + + ~MCELFStreamer() {} + + /// @name MCStreamer Interface + /// @{ + + virtual void InitSections(); + virtual void ChangeSection(const MCSection *Section); + virtual void EmitLabel(MCSymbol *Symbol); + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); + virtual void EmitThumbFunc(MCSymbol *Func); + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); + virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol); + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { + assert(0 && "ELF doesn't support this directive"); + } + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EmitCOFFSymbolStorageClass(int StorageClass) { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EmitCOFFSymbolType(int Type) { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EndCOFFSymbolDef() { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + SD.setSize(Value); + } + + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size); + + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + unsigned Size = 0, unsigned ByteAlignment = 0) { + assert(0 && "ELF doesn't support this directive"); + } + virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment = 0) { + assert(0 && "ELF doesn't support this directive"); + } + virtual void EmitBytes(StringRef Data, unsigned AddrSpace); + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, + unsigned ValueSize = 1, + unsigned MaxBytesToEmit = 0); + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0); + + virtual void EmitFileDirective(StringRef Filename); + + virtual void Finish(); + +private: + virtual void EmitInstToFragment(const MCInst &Inst); + virtual void EmitInstToData(const MCInst &Inst); + + void fixSymbolsInTLSFixups(const MCExpr *expr); + + struct LocalCommon { + MCSymbolData *SD; + uint64_t Size; + unsigned ByteAlignment; + }; + std::vector<LocalCommon> LocalCommons; + + SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet; + /// @} + void SetSection(StringRef Section, unsigned Type, unsigned Flags, + SectionKind Kind) { + SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind)); + } + + void SetSectionData() { + SetSection(".data", ELF::SHT_PROGBITS, + ELF::SHF_WRITE |ELF::SHF_ALLOC, + SectionKind::getDataRel()); + EmitCodeAlignment(4, 0); + } + void SetSectionText() { + SetSection(".text", ELF::SHT_PROGBITS, + ELF::SHF_EXECINSTR | + ELF::SHF_ALLOC, SectionKind::getText()); + EmitCodeAlignment(4, 0); + } + void SetSectionBss() { + SetSection(".bss", ELF::SHT_NOBITS, + ELF::SHF_WRITE | + ELF::SHF_ALLOC, SectionKind::getBSS()); + EmitCodeAlignment(4, 0); + } +}; + +} // end llvm namespace + +#endif +//===- lib/MC/MCELFStreamer.h - ELF Object Output -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file assembles .s files and emits ELF .o object files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCELFSTREAMER_H +#define LLVM_MC_MCELFSTREAMER_H + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCSectionELF.h" + +namespace llvm { + +class MCELFStreamer : public MCObjectStreamer { +public: + MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter) + : MCObjectStreamer(Context, TAB, OS, Emitter) {} + + ~MCELFStreamer() {} + + /// @name MCStreamer Interface + /// @{ + + virtual void InitSections(); + virtual void ChangeSection(const MCSection *Section); + virtual void EmitLabel(MCSymbol *Symbol); + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); + virtual void EmitThumbFunc(MCSymbol *Func); + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); + virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol); + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { + assert(0 && "ELF doesn't support this directive"); + } + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EmitCOFFSymbolStorageClass(int StorageClass) { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EmitCOFFSymbolType(int Type) { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EndCOFFSymbolDef() { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + SD.setSize(Value); + } + + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size); + + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + unsigned Size = 0, unsigned ByteAlignment = 0) { + assert(0 && "ELF doesn't support this directive"); + } + virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment = 0) { + assert(0 && "ELF doesn't support this directive"); + } + virtual void EmitBytes(StringRef Data, unsigned AddrSpace); + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, + unsigned ValueSize = 1, + unsigned MaxBytesToEmit = 0); + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0); + + virtual void EmitFileDirective(StringRef Filename); + + virtual void Finish(); + +private: + virtual void EmitInstToFragment(const MCInst &Inst); + virtual void EmitInstToData(const MCInst &Inst); + + void fixSymbolsInTLSFixups(const MCExpr *expr); + + struct LocalCommon { + MCSymbolData *SD; + uint64_t Size; + unsigned ByteAlignment; + }; + std::vector<LocalCommon> LocalCommons; + + SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet; + /// @} + void SetSection(StringRef Section, unsigned Type, unsigned Flags, + SectionKind Kind) { + SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind)); + } + + void SetSectionData() { + SetSection(".data", ELF::SHT_PROGBITS, + ELF::SHF_WRITE |ELF::SHF_ALLOC, + SectionKind::getDataRel()); + EmitCodeAlignment(4, 0); + } + void SetSectionText() { + SetSection(".text", ELF::SHT_PROGBITS, + ELF::SHF_EXECINSTR | + ELF::SHF_ALLOC, SectionKind::getText()); + EmitCodeAlignment(4, 0); + } + void SetSectionBss() { + SetSection(".bss", ELF::SHT_NOBITS, + ELF::SHF_WRITE | + ELF::SHF_ALLOC, SectionKind::getBSS()); + EmitCodeAlignment(4, 0); + } +}; + +} // end llvm namespace + +#endif diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index 54d3743..3a674d7 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -310,6 +310,11 @@ static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm, if (AD.getFragment() == BD.getFragment()) { Addend += (AD.getOffset() - BD.getOffset()); + // Pointers to Thumb symbols need to have their low-bit set to allow + // for interworking. + if (Asm->isThumbFunc(&SA)) + Addend |= 1; + // Clear the symbol expr pointers to indicate we have folded these // operands. A = B = 0; @@ -384,7 +389,7 @@ static bool EvaluateSymbolicAdd(const MCAssembler *Asm, // (LHS_A - RHS_B), // (RHS_A - LHS_B), // (RHS_A - RHS_B). - // Since we are attempting to be as aggresive as possible about folding, we + // Since we are attempting to be as aggressive as possible about folding, we // attempt to evaluate each possible alternative. AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, LHS_A, LHS_B, Result_Cst); @@ -554,3 +559,45 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, assert(0 && "Invalid assembly expression kind!"); return false; } + +const MCSection *MCExpr::FindAssociatedSection() const { + switch (getKind()) { + case Target: + // We never look through target specific expressions. + return cast<MCTargetExpr>(this)->FindAssociatedSection(); + + case Constant: + return MCSymbol::AbsolutePseudoSection; + + case SymbolRef: { + const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(this); + const MCSymbol &Sym = SRE->getSymbol(); + + if (Sym.isDefined()) + return &Sym.getSection(); + + return 0; + } + + case Unary: + return cast<MCUnaryExpr>(this)->getSubExpr()->FindAssociatedSection(); + + case Binary: { + const MCBinaryExpr *BE = cast<MCBinaryExpr>(this); + const MCSection *LHS_S = BE->getLHS()->FindAssociatedSection(); + const MCSection *RHS_S = BE->getRHS()->FindAssociatedSection(); + + // If either section is absolute, return the other. + if (LHS_S == MCSymbol::AbsolutePseudoSection) + return RHS_S; + if (RHS_S == MCSymbol::AbsolutePseudoSection) + return LHS_S; + + // Otherwise, return the first non-null section. + return LHS_S ? LHS_S : RHS_S; + } + } + + assert(0 && "Invalid assembly expression kind!"); + return 0; +} diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp index 92a7154..212b85e 100644 --- a/lib/MC/MCInstPrinter.cpp +++ b/lib/MC/MCInstPrinter.cpp @@ -19,3 +19,8 @@ MCInstPrinter::~MCInstPrinter() { StringRef MCInstPrinter::getOpcodeName(unsigned Opcode) const { return ""; } + +StringRef MCInstPrinter::getRegName(unsigned RegNo) const { + assert(0 && "Target should implement this"); + return ""; +} diff --git a/lib/MC/MCLoggingStreamer.cpp b/lib/MC/MCLoggingStreamer.cpp index 012c7f6..46ea9b8 100644 --- a/lib/MC/MCLoggingStreamer.cpp +++ b/lib/MC/MCLoggingStreamer.cpp @@ -154,21 +154,19 @@ public: } virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, - bool isPCRel, unsigned AddrSpace){ + unsigned AddrSpace){ LogCall("EmitValue"); - return Child->EmitValueImpl(Value, Size, isPCRel, AddrSpace); + return Child->EmitValueImpl(Value, Size, AddrSpace); } - virtual void EmitULEB128Value(const MCExpr *Value, - unsigned AddrSpace = 0) { + virtual void EmitULEB128Value(const MCExpr *Value) { LogCall("EmitULEB128Value"); - return Child->EmitULEB128Value(Value, AddrSpace); + return Child->EmitULEB128Value(Value); } - virtual void EmitSLEB128Value(const MCExpr *Value, - unsigned AddrSpace = 0) { + virtual void EmitSLEB128Value(const MCExpr *Value) { LogCall("EmitSLEB128Value"); - return Child->EmitSLEB128Value(Value, AddrSpace); + return Child->EmitSLEB128Value(Value); } virtual void EmitGPRel32Value(const MCExpr *Value) { @@ -215,13 +213,14 @@ public: virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line, unsigned Column, unsigned Flags, - unsigned Isa, unsigned Discriminator) { + unsigned Isa, unsigned Discriminator, + StringRef FileName) { LogCall("EmitDwarfLocDirective", "FileNo:" + Twine(FileNo) + " Line:" + Twine(Line) + " Column:" + Twine(Column) + " Flags:" + Twine(Flags) + " Isa:" + Twine(Isa) + " Discriminator:" + Twine(Discriminator)); return Child->EmitDwarfLocDirective(FileNo, Line, Column, Flags, - Isa, Discriminator); + Isa, Discriminator, FileName); } virtual void EmitInstruction(const MCInst &Inst) { diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index d1f9f5c..3da5b49 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -44,6 +44,8 @@ public: virtual void InitSections(); virtual void EmitLabel(MCSymbol *Symbol); + virtual void EmitEHSymAttributes(const MCSymbol *Symbol, + MCSymbol *EHSymbol); virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); virtual void EmitThumbFunc(MCSymbol *Func); virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); @@ -101,6 +103,18 @@ void MCMachOStreamer::InitSections() { } +void MCMachOStreamer::EmitEHSymAttributes(const MCSymbol *Symbol, + MCSymbol *EHSymbol) { + MCSymbolData &SD = + getAssembler().getOrCreateSymbolData(*Symbol); + if (SD.isExternal()) + EmitSymbolAttribute(EHSymbol, MCSA_Global); + if (SD.getFlags() & SF_WeakDefinition) + EmitSymbolAttribute(EHSymbol, MCSA_WeakDefinition); + if (SD.isPrivateExtern()) + EmitSymbolAttribute(EHSymbol, MCSA_PrivateExtern); +} + void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); @@ -363,6 +377,9 @@ void MCMachOStreamer::EmitInstToData(const MCInst &Inst) { } void MCMachOStreamer::Finish() { + if (getNumFrameInfos()) + MCDwarfFrameEmitter::Emit(*this, true); + // We have to set the fragment atom associations so we can relax properly for // Mach-O. diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp index 08ddf01..f38b822 100644 --- a/lib/MC/MCNullStreamer.cpp +++ b/lib/MC/MCNullStreamer.cpp @@ -67,11 +67,9 @@ namespace { virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {} virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, - bool isPCRel, unsigned AddrSpace) {} - virtual void EmitULEB128Value(const MCExpr *Value, - unsigned AddrSpace = 0) {} - virtual void EmitSLEB128Value(const MCExpr *Value, - unsigned AddrSpace = 0) {} + unsigned AddrSpace) {} + virtual void EmitULEB128Value(const MCExpr *Value) {} + virtual void EmitSLEB128Value(const MCExpr *Value) {} virtual void EmitGPRel32Value(const MCExpr *Value) {} virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, unsigned ValueSize = 1, @@ -89,7 +87,8 @@ namespace { } virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line, unsigned Column, unsigned Flags, - unsigned Isa, unsigned Discriminator) {} + unsigned Isa, unsigned Discriminator, + StringRef FileName) {} virtual void EmitInstruction(const MCInst &Inst) {} virtual void Finish() {} diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index e67d9b0..0f349d0 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -31,6 +31,13 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB, { } +MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter_, + MCAssembler *_Assembler) + : MCStreamer(Context), Assembler(_Assembler), CurSectionData(0) +{ +} + MCObjectStreamer::~MCObjectStreamer() { delete &Assembler->getBackend(); delete &Assembler->getEmitter(); @@ -83,7 +90,7 @@ const MCExpr *MCObjectStreamer::AddValueSymbols(const MCExpr *Value) { } void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, - bool isPCRel, unsigned AddrSpace) { + unsigned AddrSpace) { assert(AddrSpace == 0 && "Address space must be 0!"); MCDataFragment *DF = getOrCreateDataFragment(); @@ -95,15 +102,12 @@ void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, } DF->addFixup(MCFixup::Create(DF->getContents().size(), Value, - MCFixup::getKindForSize(Size, isPCRel))); + MCFixup::getKindForSize(Size, false))); DF->getContents().resize(DF->getContents().size() + Size, 0); } void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) { - assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); - assert(getCurrentSection() && "Cannot emit before setting section!"); - - Symbol->setSection(*getCurrentSection()); + MCStreamer::EmitLabel(Symbol); MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); @@ -117,23 +121,23 @@ void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) { SD.setOffset(F->getContents().size()); } -void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value, - unsigned AddrSpace) { +void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value) { int64_t IntValue; if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) { - EmitULEB128IntValue(IntValue, AddrSpace); + EmitULEB128IntValue(IntValue); return; } + Value = ForceExpAbs(this, getContext(), Value); new MCLEBFragment(*Value, false, getCurrentSectionData()); } -void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value, - unsigned AddrSpace) { +void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value) { int64_t IntValue; if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) { - EmitSLEB128IntValue(IntValue, AddrSpace); + EmitSLEB128IntValue(IntValue); return; } + Value = ForceExpAbs(this, getContext(), Value); new MCLEBFragment(*Value, true, getCurrentSectionData()); } @@ -184,30 +188,11 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst) { void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst) { MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData()); - raw_svector_ostream VecOS(IF->getCode()); + SmallString<128> Code; + raw_svector_ostream VecOS(Code); getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, IF->getFixups()); -} - -static const MCExpr *BuildSymbolDiff(MCContext &Context, - const MCSymbol *A, const MCSymbol *B) { - MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; - const MCExpr *ARef = - MCSymbolRefExpr::Create(A, Variant, Context); - const MCExpr *BRef = - MCSymbolRefExpr::Create(B, Variant, Context); - const MCExpr *AddrDelta = - MCBinaryExpr::Create(MCBinaryExpr::Sub, ARef, BRef, Context); - return AddrDelta; -} - -static const MCExpr *ForceExpAbs(MCObjectStreamer *Streamer, - MCContext &Context, const MCExpr* Expr) { - if (Context.getAsmInfo().hasAggressiveSymbolFolding()) - return Expr; - - MCSymbol *ABS = Context.CreateTempSymbol(); - Streamer->EmitAssignment(ABS, Expr); - return MCSymbolRefExpr::Create(ABS, Context); + VecOS.flush(); + IF->getCode().append(Code.begin(), Code.end()); } void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta, diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index 89374d0..a3d3a49 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -213,13 +213,13 @@ AsmToken AsmLexer::LexDigit() { // Requires at least one binary digit. if (CurPtr == NumStart) - return ReturnError(TokStart, "Invalid binary number"); + return ReturnError(TokStart, "invalid binary number"); StringRef Result(TokStart, CurPtr - TokStart); long long Value; if (Result.substr(2).getAsInteger(2, Value)) - return ReturnError(TokStart, "Invalid binary number"); + return ReturnError(TokStart, "invalid binary number"); // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL // suffixes on integer literals. @@ -236,11 +236,11 @@ AsmToken AsmLexer::LexDigit() { // Requires at least one hex digit. if (CurPtr == NumStart) - return ReturnError(CurPtr-2, "Invalid hexadecimal number"); + return ReturnError(CurPtr-2, "invalid hexadecimal number"); unsigned long long Result; if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) - return ReturnError(TokStart, "Invalid hexadecimal number"); + return ReturnError(TokStart, "invalid hexadecimal number"); // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL // suffixes on integer literals. @@ -251,13 +251,13 @@ AsmToken AsmLexer::LexDigit() { } // Must be an octal number, it starts with 0. - while (*CurPtr >= '0' && *CurPtr <= '7') + while (*CurPtr >= '0' && *CurPtr <= '9') ++CurPtr; StringRef Result(TokStart, CurPtr - TokStart); long long Value; if (Result.getAsInteger(8, Value)) - return ReturnError(TokStart, "Invalid octal number"); + return ReturnError(TokStart, "invalid octal number"); // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL // suffixes on integer literals. @@ -324,8 +324,8 @@ AsmToken AsmLexer::LexQuote() { StringRef AsmLexer::LexUntilEndOfStatement() { TokStart = CurPtr; - while (!isAtStartOfComment(*CurPtr) && // Start of line comment. - *CurPtr != ';' && // End of statement marker. + while (!isAtStartOfComment(*CurPtr) && // Start of line comment. + !isAtStatementSeparator(CurPtr) && // End of statement marker. *CurPtr != '\n' && *CurPtr != '\r' && (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) { @@ -339,6 +339,11 @@ bool AsmLexer::isAtStartOfComment(char Char) { return Char == *MAI.getCommentString(); } +bool AsmLexer::isAtStatementSeparator(const char *Ptr) { + return strncmp(Ptr, MAI.getSeparatorString(), + strlen(MAI.getSeparatorString())) == 0; +} + AsmToken AsmLexer::LexToken() { TokStart = CurPtr; // This always consumes at least one character. @@ -346,6 +351,11 @@ AsmToken AsmLexer::LexToken() { if (isAtStartOfComment(CurChar)) return LexLineComment(); + if (isAtStatementSeparator(TokStart)) { + CurPtr += strlen(MAI.getSeparatorString()) - 1; + return AsmToken(AsmToken::EndOfStatement, + StringRef(TokStart, strlen(MAI.getSeparatorString()))); + } switch (CurChar) { default: @@ -362,8 +372,8 @@ AsmToken AsmLexer::LexToken() { // Ignore whitespace. return LexToken(); case '\n': // FALL THROUGH. - case '\r': // FALL THROUGH. - case ';': return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); + case '\r': + return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1)); diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index a84917f..d8fd27d 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -251,10 +251,14 @@ public: ".cfi_def_cfa"); AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfaOffset>( ".cfi_def_cfa_offset"); + AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIAdjustCfaOffset>( + ".cfi_adjust_cfa_offset"); AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfaRegister>( ".cfi_def_cfa_register"); AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIOffset>( ".cfi_offset"); + AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIRelOffset>( + ".cfi_rel_offset"); AddDirectiveHandler< &GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda>(".cfi_personality"); AddDirectiveHandler< @@ -263,6 +267,8 @@ public: &GenericAsmParser::ParseDirectiveCFIRememberState>(".cfi_remember_state"); AddDirectiveHandler< &GenericAsmParser::ParseDirectiveCFIRestoreState>(".cfi_restore_state"); + AddDirectiveHandler< + &GenericAsmParser::ParseDirectiveCFISameValue>(".cfi_same_value"); // Macro directives. AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>( @@ -287,11 +293,14 @@ public: bool ParseDirectiveCFIEndProc(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveCFIDefCfa(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveCFIDefCfaOffset(StringRef, SMLoc DirectiveLoc); + bool ParseDirectiveCFIAdjustCfaOffset(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveCFIDefCfaRegister(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc); + bool ParseDirectiveCFIRelOffset(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveCFIPersonalityOrLsda(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveCFIRememberState(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveCFIRestoreState(StringRef, SMLoc DirectiveLoc); + bool ParseDirectiveCFISameValue(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveMacrosOnOff(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveMacro(StringRef, SMLoc DirectiveLoc); @@ -517,6 +526,9 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { switch (Lexer.getKind()) { default: return TokError("unknown token in expression"); + // If we have an error assume that we've already handled it. + case AsmToken::Error: + return true; case AsmToken::Exclaim: Lex(); // Eat the operator. if (ParsePrimaryExpr(Res, EndLoc)) @@ -880,6 +892,7 @@ bool AsmParser::ParseStatement() { EatToEndOfStatement(); return false; } + // Allow an integer followed by a ':' as a directional local label. if (Lexer.is(AsmToken::Integer)) { LocalLabelVal = getTok().getIntVal(); @@ -896,13 +909,19 @@ bool AsmParser::ParseStatement() { return TokError("unexpected token at start of statement"); } } - } - else if (ParseIdentifier(IDVal)) { + + } else if (Lexer.is(AsmToken::Dot)) { + // Treat '.' as a valid identifier in this context. + Lex(); + IDVal = "."; + + } else if (ParseIdentifier(IDVal)) { if (!TheCondState.Ignore) return TokError("unexpected token at start of statement"); IDVal = ""; } + // Handle conditional assembly here before checking for skipping. We // have to do this so that .endif isn't skipped in a ".if 0" block for // example. @@ -935,6 +954,10 @@ bool AsmParser::ParseStatement() { // identifier ':' -> Label. Lex(); + // Diagnose attempt to use '.' as a label. + if (IDVal == ".") + return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label"); + // Diagnose attempt to use a variable as a label. // // FIXME: Diagnostics. Note the location of the definition as a label. @@ -978,7 +1001,7 @@ bool AsmParser::ParseStatement() { return HandleMacroEntry(IDVal, IDLoc, M); // Otherwise, we have a normal instruction or directive. - if (IDVal[0] == '.') { + if (IDVal[0] == '.' && IDVal != ".") { // Assembler features if (IDVal == ".set" || IDVal == ".equ") return ParseDirectiveSet(IDVal, true); @@ -1041,7 +1064,7 @@ bool AsmParser::ParseStatement() { if (IDVal == ".fill") return ParseDirectiveFill(); - if (IDVal == ".space") + if (IDVal == ".space" || IDVal == ".skip") return ParseDirectiveSpace(); if (IDVal == ".zero") return ParseDirectiveZero(); @@ -1306,6 +1329,12 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef) { if (Lexer.isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in assignment"); + // Error on assignment to '.'. + if (Name == ".") { + return Error(EqualLoc, ("assignment to pseudo-symbol '.' is unsupported " + "(use '.space' or '.org').)")); + } + // Eat the end of statement marker. Lex(); @@ -1319,7 +1348,7 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef) { // FIXME: Diagnose assignment to protected identifier (e.g., register name). if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable()) ; // Allow redefinitions of undefined symbols only used in directives. - else if (!Sym->isUndefined() && (!Sym->isAbsolute() || !allow_redef)) + else if (!Sym->isUndefined() && (!Sym->isVariable() || !allow_redef)) return Error(EqualLoc, "redefinition of '" + Name + "'"); else if (!Sym->isVariable()) return Error(EqualLoc, "invalid assignment to '" + Name + "'"); @@ -1535,13 +1564,21 @@ bool AsmParser::ParseDirectiveRealValue(const fltSemantics &Semantics) { Lex(); if (getLexer().isNot(AsmToken::Integer) && - getLexer().isNot(AsmToken::Real)) + getLexer().isNot(AsmToken::Real) && + getLexer().isNot(AsmToken::Identifier)) return TokError("unexpected token in directive"); // Convert to an APFloat. APFloat Value(Semantics); - if (Value.convertFromString(getTok().getString(), - APFloat::rmNearestTiesToEven) == + StringRef IDVal = getTok().getString(); + if (getLexer().is(AsmToken::Identifier)) { + if (!IDVal.compare_lower("infinity") || !IDVal.compare_lower("inf")) + Value = APFloat::getInf(Semantics); + else if (!IDVal.compare_lower("nan")) + Value = APFloat::getNaN(Semantics, false, ~0); + else + return TokError("invalid floating point literal"); + } else if (Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven) == APFloat::opInvalidOp) return TokError("invalid floating point literal"); if (IsNeg) @@ -2216,7 +2253,7 @@ bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) { } getStreamer().EmitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags, - Isa, Discriminator); + Isa, Discriminator, StringRef()); return false; } @@ -2232,13 +2269,15 @@ bool GenericAsmParser::ParseDirectiveStabs(StringRef Directive, /// ::= .cfi_startproc bool GenericAsmParser::ParseDirectiveCFIStartProc(StringRef, SMLoc DirectiveLoc) { - return getStreamer().EmitCFIStartProc(); + getStreamer().EmitCFIStartProc(); + return false; } /// ParseDirectiveCFIEndProc /// ::= .cfi_endproc bool GenericAsmParser::ParseDirectiveCFIEndProc(StringRef, SMLoc DirectiveLoc) { - return getStreamer().EmitCFIEndProc(); + getStreamer().EmitCFIEndProc(); + return false; } /// ParseRegisterOrRegisterNumber - parse register name or number. @@ -2273,7 +2312,8 @@ bool GenericAsmParser::ParseDirectiveCFIDefCfa(StringRef, if (getParser().ParseAbsoluteExpression(Offset)) return true; - return getStreamer().EmitCFIDefCfa(Register, Offset); + getStreamer().EmitCFIDefCfa(Register, Offset); + return false; } /// ParseDirectiveCFIDefCfaOffset @@ -2284,7 +2324,20 @@ bool GenericAsmParser::ParseDirectiveCFIDefCfaOffset(StringRef, if (getParser().ParseAbsoluteExpression(Offset)) return true; - return getStreamer().EmitCFIDefCfaOffset(Offset); + getStreamer().EmitCFIDefCfaOffset(Offset); + return false; +} + +/// ParseDirectiveCFIAdjustCfaOffset +/// ::= .cfi_adjust_cfa_offset adjustment +bool GenericAsmParser::ParseDirectiveCFIAdjustCfaOffset(StringRef, + SMLoc DirectiveLoc) { + int64_t Adjustment = 0; + if (getParser().ParseAbsoluteExpression(Adjustment)) + return true; + + getStreamer().EmitCFIAdjustCfaOffset(Adjustment); + return false; } /// ParseDirectiveCFIDefCfaRegister @@ -2295,11 +2348,12 @@ bool GenericAsmParser::ParseDirectiveCFIDefCfaRegister(StringRef, if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) return true; - return getStreamer().EmitCFIDefCfaRegister(Register); + getStreamer().EmitCFIDefCfaRegister(Register); + return false; } /// ParseDirectiveCFIOffset -/// ::= .cfi_off register, offset +/// ::= .cfi_offset register, offset bool GenericAsmParser::ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc) { int64_t Register = 0; int64_t Offset = 0; @@ -2314,7 +2368,29 @@ bool GenericAsmParser::ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc) { if (getParser().ParseAbsoluteExpression(Offset)) return true; - return getStreamer().EmitCFIOffset(Register, Offset); + getStreamer().EmitCFIOffset(Register, Offset); + return false; +} + +/// ParseDirectiveCFIRelOffset +/// ::= .cfi_rel_offset register, offset +bool GenericAsmParser::ParseDirectiveCFIRelOffset(StringRef, + SMLoc DirectiveLoc) { + int64_t Register = 0; + + if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) + return true; + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + int64_t Offset = 0; + if (getParser().ParseAbsoluteExpression(Offset)) + return true; + + getStreamer().EmitCFIRelOffset(Register, Offset); + return false; } static bool isValidEncoding(int64_t Encoding) { @@ -2364,25 +2440,42 @@ bool GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda(StringRef IDVal, MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); if (IDVal == ".cfi_personality") - return getStreamer().EmitCFIPersonality(Sym, Encoding); + getStreamer().EmitCFIPersonality(Sym, Encoding); else { assert(IDVal == ".cfi_lsda"); - return getStreamer().EmitCFILsda(Sym, Encoding); + getStreamer().EmitCFILsda(Sym, Encoding); } + return false; } /// ParseDirectiveCFIRememberState /// ::= .cfi_remember_state bool GenericAsmParser::ParseDirectiveCFIRememberState(StringRef IDVal, SMLoc DirectiveLoc) { - return getStreamer().EmitCFIRememberState(); + getStreamer().EmitCFIRememberState(); + return false; } /// ParseDirectiveCFIRestoreState /// ::= .cfi_remember_state bool GenericAsmParser::ParseDirectiveCFIRestoreState(StringRef IDVal, SMLoc DirectiveLoc) { - return getStreamer().EmitCFIRestoreState(); + getStreamer().EmitCFIRestoreState(); + return false; +} + +/// ParseDirectiveCFISameValue +/// ::= .cfi_same_value register +bool GenericAsmParser::ParseDirectiveCFISameValue(StringRef IDVal, + SMLoc DirectiveLoc) { + int64_t Register = 0; + + if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) + return true; + + getStreamer().EmitCFISameValue(Register); + + return false; } /// ParseDirectiveMacrosOnOff diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp index 44f2345..3c092cd 100644 --- a/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/lib/MC/MCParser/DarwinAsmParser.cpp @@ -100,6 +100,8 @@ public: AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveText>(".text"); AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveThreadInitFunc>(".thread_init_func"); AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTLV>(".tlv"); + + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveIdent>(".ident"); } bool ParseDirectiveDesc(StringRef, SMLoc); @@ -277,6 +279,11 @@ public: return ParseSectionSwitch("__DATA", "__thread_vars", MCSectionMachO::S_THREAD_LOCAL_VARIABLES); } + bool ParseSectionDirectiveIdent(StringRef, SMLoc) { + // Darwin silently ignores the .ident directive. + getParser().EatToEndOfStatement(); + return false; + } bool ParseSectionDirectiveThreadInitFunc(StringRef, SMLoc) { return ParseSectionSwitch("__DATA", "__thread_init", MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS); @@ -427,10 +434,12 @@ bool DarwinAsmParser::ParseDirectiveSection(StringRef, SMLoc) { StringRef Segment, Section; - unsigned TAA, StubSize; + unsigned StubSize; + unsigned TAA; + bool TAAParsed; std::string ErrorStr = MCSectionMachO::ParseSectionSpecifier(SectionSpec, Segment, Section, - TAA, StubSize); + TAA, TAAParsed, StubSize); if (!ErrorStr.empty()) return Error(Loc, ErrorStr.c_str()); diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp index d32aea1..dfd77c3 100644 --- a/lib/MC/MCSectionELF.cpp +++ b/lib/MC/MCSectionELF.cpp @@ -39,8 +39,28 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, return; } - OS << "\t.section\t" << getSectionName(); - + StringRef name = getSectionName(); + if (name.find_first_not_of("0123456789_." + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == name.npos) { + OS << "\t.section\t" << name; + } else { + OS << "\t.section\t\""; + for (const char *b = name.begin(), *e = name.end(); b < e; ++b) { + if (*b == '"') // Unquoted " + OS << "\\\""; + else if (*b != '\\') // Neither " or backslash + OS << *b; + else if (b + 1 == e) // Trailing backslash + OS << "\\\\"; + else { + OS << b[0] << b[1]; // Quoted character + ++b; + } + } + OS << '"'; + } + // Handle the weird solaris syntax if desired. if (MAI.usesSunStyleELFSectionSwitchSyntax() && !(Flags & ELF::SHF_MERGE)) { diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp index 577e93a..e771556 100644 --- a/lib/MC/MCSectionMachO.cpp +++ b/lib/MC/MCSectionMachO.cpp @@ -180,7 +180,9 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In. StringRef &Segment, // Out. StringRef &Section, // Out. unsigned &TAA, // Out. + bool &TAAParsed, // Out. unsigned &StubSize) { // Out. + TAAParsed = false; // Find the first comma. std::pair<StringRef, StringRef> Comma = Spec.split(','); @@ -211,6 +213,7 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In. "between 1 and 16 characters"; // If there is no comma after the section, we're done. + TAA = 0; StubSize = 0; if (Comma.second.empty()) return ""; @@ -235,6 +238,7 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In. // Remember the TypeID. TAA = TypeID; + TAAParsed = true; // If we have no comma after the section type, there are no attributes. if (Comma.second.empty()) { diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 4b302c8..fa245b1 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -12,6 +12,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallString.h" @@ -27,6 +28,29 @@ MCStreamer::MCStreamer(MCContext &Ctx) : Context(Ctx) { MCStreamer::~MCStreamer() { } +const MCExpr *MCStreamer::BuildSymbolDiff(MCContext &Context, + const MCSymbol *A, + const MCSymbol *B) { + MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; + const MCExpr *ARef = + MCSymbolRefExpr::Create(A, Variant, Context); + const MCExpr *BRef = + MCSymbolRefExpr::Create(B, Variant, Context); + const MCExpr *AddrDelta = + MCBinaryExpr::Create(MCBinaryExpr::Sub, ARef, BRef, Context); + return AddrDelta; +} + +const MCExpr *MCStreamer::ForceExpAbs(MCStreamer *Streamer, + MCContext &Context, const MCExpr* Expr) { + if (Context.getAsmInfo().hasAggressiveSymbolFolding()) + return Expr; + + MCSymbol *ABS = Context.CreateTempSymbol(); + Streamer->EmitAssignment(ABS, Expr); + return MCSymbolRefExpr::Create(ABS, Context); +} + raw_ostream &MCStreamer::GetCommentOS() { // By default, discard comments. return nulls(); @@ -90,30 +114,15 @@ void MCStreamer::EmitAbsValue(const MCExpr *Value, unsigned Size, void MCStreamer::EmitValue(const MCExpr *Value, unsigned Size, unsigned AddrSpace) { - EmitValueImpl(Value, Size, false, AddrSpace); -} - -void MCStreamer::EmitPCRelValue(const MCExpr *Value, unsigned Size, - unsigned AddrSpace) { - EmitValueImpl(Value, Size, true, AddrSpace); + EmitValueImpl(Value, Size, AddrSpace); } void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size, - bool isPCRel, unsigned AddrSpace) { - EmitValueImpl(MCSymbolRefExpr::Create(Sym, getContext()), Size, isPCRel, + unsigned AddrSpace) { + EmitValueImpl(MCSymbolRefExpr::Create(Sym, getContext()), Size, AddrSpace); } -void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size, - unsigned AddrSpace) { - EmitSymbolValue(Sym, Size, false, AddrSpace); -} - -void MCStreamer::EmitPCRelSymbolValue(const MCSymbol *Sym, unsigned Size, - unsigned AddrSpace) { - EmitSymbolValue(Sym, Size, true, AddrSpace); -} - void MCStreamer::EmitGPRel32Value(const MCExpr *Value) { report_fatal_error("unsupported directive in streamer"); } @@ -135,7 +144,8 @@ bool MCStreamer::EmitDwarfFileDirective(unsigned FileNo, void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line, unsigned Column, unsigned Flags, unsigned Isa, - unsigned Discriminator) { + unsigned Discriminator, + StringRef FileName) { getContext().setCurrentDwarfLoc(FileNo, Line, Column, Flags, Isa, Discriminator); } @@ -152,28 +162,39 @@ void MCStreamer::EnsureValidFrame() { report_fatal_error("No open frame"); } -bool MCStreamer::EmitCFIStartProc() { +void MCStreamer::EmitEHSymAttributes(const MCSymbol *Symbol, + MCSymbol *EHSymbol) { +} + +void MCStreamer::EmitLabel(MCSymbol *Symbol) { + assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); + assert(getCurrentSection() && "Cannot emit before setting section!"); + Symbol->setSection(*getCurrentSection()); + + StringRef Prefix = getContext().getAsmInfo().getPrivateGlobalPrefix(); + if (!Symbol->getName().startswith(Prefix)) + LastNonPrivate = Symbol; +} + +void MCStreamer::EmitCFIStartProc() { MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); - if (CurFrame && !CurFrame->End) { + if (CurFrame && !CurFrame->End) report_fatal_error("Starting a frame before finishing the previous one!"); - return true; - } MCDwarfFrameInfo Frame; Frame.Begin = getContext().CreateTempSymbol(); + Frame.Function = LastNonPrivate; EmitLabel(Frame.Begin); FrameInfos.push_back(Frame); - return false; } -bool MCStreamer::EmitCFIEndProc() { +void MCStreamer::EmitCFIEndProc() { EnsureValidFrame(); MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); CurFrame->End = getContext().CreateTempSymbol(); EmitLabel(CurFrame->End); - return false; } -bool MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) { +void MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) { EnsureValidFrame(); MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); MCSymbol *Label = getContext().CreateTempSymbol(); @@ -182,10 +203,9 @@ bool MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) { MachineLocation Source(Register, -Offset); MCCFIInstruction Instruction(Label, Dest, Source); CurFrame->Instructions.push_back(Instruction); - return false; } -bool MCStreamer::EmitCFIDefCfaOffset(int64_t Offset) { +void MCStreamer::EmitCFIDefCfaOffset(int64_t Offset) { EnsureValidFrame(); MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); MCSymbol *Label = getContext().CreateTempSymbol(); @@ -194,10 +214,20 @@ bool MCStreamer::EmitCFIDefCfaOffset(int64_t Offset) { MachineLocation Source(MachineLocation::VirtualFP, -Offset); MCCFIInstruction Instruction(Label, Dest, Source); CurFrame->Instructions.push_back(Instruction); - return false; } -bool MCStreamer::EmitCFIDefCfaRegister(int64_t Register) { +void MCStreamer::EmitCFIAdjustCfaOffset(int64_t Adjustment) { + EnsureValidFrame(); + MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); + MCSymbol *Label = getContext().CreateTempSymbol(); + EmitLabel(Label); + MachineLocation Dest(MachineLocation::VirtualFP); + MachineLocation Source(MachineLocation::VirtualFP, Adjustment); + MCCFIInstruction Instruction(MCCFIInstruction::RelMove, Label, Dest, Source); + CurFrame->Instructions.push_back(Instruction); +} + +void MCStreamer::EmitCFIDefCfaRegister(int64_t Register) { EnsureValidFrame(); MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); MCSymbol *Label = getContext().CreateTempSymbol(); @@ -206,10 +236,9 @@ bool MCStreamer::EmitCFIDefCfaRegister(int64_t Register) { MachineLocation Source(MachineLocation::VirtualFP); MCCFIInstruction Instruction(Label, Dest, Source); CurFrame->Instructions.push_back(Instruction); - return false; } -bool MCStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) { +void MCStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) { EnsureValidFrame(); MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); MCSymbol *Label = getContext().CreateTempSymbol(); @@ -218,37 +247,44 @@ bool MCStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) { MachineLocation Source(Register, Offset); MCCFIInstruction Instruction(Label, Dest, Source); CurFrame->Instructions.push_back(Instruction); - return false; } -bool MCStreamer::EmitCFIPersonality(const MCSymbol *Sym, +void MCStreamer::EmitCFIRelOffset(int64_t Register, int64_t Offset) { + EnsureValidFrame(); + MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); + MCSymbol *Label = getContext().CreateTempSymbol(); + EmitLabel(Label); + MachineLocation Dest(Register, Offset); + MachineLocation Source(Register, Offset); + MCCFIInstruction Instruction(MCCFIInstruction::RelMove, Label, Dest, Source); + CurFrame->Instructions.push_back(Instruction); +} + +void MCStreamer::EmitCFIPersonality(const MCSymbol *Sym, unsigned Encoding) { EnsureValidFrame(); MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); CurFrame->Personality = Sym; CurFrame->PersonalityEncoding = Encoding; - return false; } -bool MCStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) { +void MCStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) { EnsureValidFrame(); MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); CurFrame->Lsda = Sym; CurFrame->LsdaEncoding = Encoding; - return false; } -bool MCStreamer::EmitCFIRememberState() { +void MCStreamer::EmitCFIRememberState() { EnsureValidFrame(); MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); MCSymbol *Label = getContext().CreateTempSymbol(); EmitLabel(Label); MCCFIInstruction Instruction(MCCFIInstruction::Remember, Label); CurFrame->Instructions.push_back(Instruction); - return false; } -bool MCStreamer::EmitCFIRestoreState() { +void MCStreamer::EmitCFIRestoreState() { // FIXME: Error if there is no matching cfi_remember_state. EnsureValidFrame(); MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); @@ -256,7 +292,55 @@ bool MCStreamer::EmitCFIRestoreState() { EmitLabel(Label); MCCFIInstruction Instruction(MCCFIInstruction::Restore, Label); CurFrame->Instructions.push_back(Instruction); - return false; +} + +void MCStreamer::EmitCFISameValue(int64_t Register) { + EnsureValidFrame(); + MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); + MCSymbol *Label = getContext().CreateTempSymbol(); + EmitLabel(Label); + MCCFIInstruction Instruction(MCCFIInstruction::SameValue, Label, Register); + CurFrame->Instructions.push_back(Instruction); +} + +void MCStreamer::EmitFnStart() { + errs() << "Not implemented yet\n"; + abort(); +} + +void MCStreamer::EmitFnEnd() { + errs() << "Not implemented yet\n"; + abort(); +} + +void MCStreamer::EmitCantUnwind() { + errs() << "Not implemented yet\n"; + abort(); +} + +void MCStreamer::EmitHandlerData() { + errs() << "Not implemented yet\n"; + abort(); +} + +void MCStreamer::EmitPersonality(const MCSymbol *Personality) { + errs() << "Not implemented yet\n"; + abort(); +} + +void MCStreamer::EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) { + errs() << "Not implemented yet\n"; + abort(); +} + +void MCStreamer::EmitPad(int64_t Offset) { + errs() << "Not implemented yet\n"; + abort(); +} + +void MCStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool) { + errs() << "Not implemented yet\n"; + abort(); } /// EmitRawText - If this file is backed by an assembly streamer, this dumps diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp index 1c71f26..c2fad16 100644 --- a/lib/MC/MCSymbol.cpp +++ b/lib/MC/MCSymbol.cpp @@ -58,9 +58,13 @@ void MCSymbol::setVariableValue(const MCExpr *Value) { "Invalid redefinition!"); this->Value = Value; - // Mark the variable as absolute as appropriate. - if (isa<MCConstantExpr>(Value)) - setAbsolute(); + // Variables should always be marked as in the same "section" as the value. + const MCSection *Section = Value->FindAssociatedSection(); + if (Section) { + setSection(*Section); + } else { + setUndefined(); + } } void MCSymbol::print(raw_ostream &OS) const { diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index 8af07c7..f049b1c 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -121,6 +121,33 @@ private: } uint64_t getSymbolAddress(const MCSymbolData* SD, const MCAsmLayout &Layout) const { + const MCSymbol &S = SD->getSymbol(); + + // If this is a variable, then recursively evaluate now. + if (S.isVariable()) { + MCValue Target; + if (!S.getVariableValue()->EvaluateAsRelocatable(Target, Layout)) + report_fatal_error("unable to evaluate offset for variable '" + + S.getName() + "'"); + + // Verify that any used symbols are defined. + if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined()) + report_fatal_error("unable to evaluate offset to undefined symbol '" + + Target.getSymA()->getSymbol().getName() + "'"); + if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined()) + report_fatal_error("unable to evaluate offset to undefined symbol '" + + Target.getSymB()->getSymbol().getName() + "'"); + + uint64_t Address = Target.getConstant(); + if (Target.getSymA()) + Address += getSymbolAddress(&Layout.getAssembler().getSymbolData( + Target.getSymA()->getSymbol()), Layout); + if (Target.getSymB()) + Address += getSymbolAddress(&Layout.getAssembler().getSymbolData( + Target.getSymB()->getSymbol()), Layout); + return Address; + } + return getSectionAddress(SD->getFragment()->getParent()) + Layout.getSymbolOffset(SD); } @@ -274,8 +301,8 @@ public: if (is64Bit()) Write32(0); // reserved3 - assert(OS.tell() - Start == is64Bit() ? macho::Section64Size : - macho::Section32Size); + assert(OS.tell() - Start == (is64Bit() ? macho::Section64Size : + macho::Section32Size)); } void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols, @@ -440,7 +467,7 @@ public: // Compensate for the relocation offset, Darwin x86_64 relocations only // have the addend and appear to have attempted to define it to be the // actual expression addend without the PCrel bias. However, instructions - // with data following the relocation are not accomodated for (see comment + // with data following the relocation are not accommodated for (see comment // below regarding SIGNED{1,2,4}), so it isn't exactly that either. Value += 1LL << Log2Size; } @@ -541,7 +568,7 @@ public: } // x86_64 almost always uses external relocations, except when there is no - // symbol to use as a base address (a local symbol with no preceeding + // symbol to use as a base address (a local symbol with no preceding // non-local symbol). if (Base) { Index = Base->getIndex(); @@ -550,7 +577,7 @@ public: // Add the local offset, if needed. if (Base != &SD) Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base); - } else if (Symbol->isInSection()) { + } else if (Symbol->isInSection() && !Symbol->isVariable()) { // The index is the section ordinal (1-based). Index = SD.getFragment()->getParent()->getOrdinal() + 1; IsExtern = 0; @@ -821,12 +848,12 @@ public: // 1 - :upper16: for movt instructions // high bit of r_length: // 0 - arm instructions - // 1 - thumb instructions + // 1 - thumb instructions // the other half of the relocated expression is in the following pair // relocation entry in the the low 16 bits of r_address field. unsigned ThumbBit = 0; unsigned MovtBit = 0; - switch (Fixup.getKind()) { + switch ((unsigned)Fixup.getKind()) { default: break; case ARM::fixup_arm_movt_hi16: case ARM::fixup_arm_movt_hi16_pcrel: @@ -952,15 +979,10 @@ public: RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); Log2Size = llvm::Log2_32(2); return true; - + case ARM::fixup_arm_thumb_bl: - RelocType = unsigned(macho::RIT_ARM_ThumbBranch32Bit); - Log2Size = llvm::Log2_32(4); - return true; - case ARM::fixup_arm_thumb_blx: RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); - // Report as 'long', even though that is not quite accurate. Log2Size = llvm::Log2_32(4); return true; @@ -1033,17 +1055,17 @@ public: // FIXME! report_fatal_error("FIXME: relocations to absolute targets " "not yet implemented"); - } else if (SD->getSymbol().isVariable()) { - int64_t Res; - if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( - Res, Layout, SectionAddress)) { - FixedValue = Res; - return; + } else { + // Resolve constant variables. + if (SD->getSymbol().isVariable()) { + int64_t Res; + if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( + Res, Layout, SectionAddress)) { + FixedValue = Res; + return; + } } - report_fatal_error("unsupported relocation of variable '" + - SD->getSymbol().getName() + "'"); - } else { // Check whether we need an external or internal relocation. if (doesSymbolRequireExternRelocation(SD)) { IsExtern = 1; @@ -1055,8 +1077,10 @@ public: FixedValue -= Layout.getSymbolOffset(SD); } else { // The index is the section ordinal (1-based). - Index = SD->getFragment()->getParent()->getOrdinal() + 1; - FixedValue += getSectionAddress(SD->getFragment()->getParent()); + const MCSectionData &SymSD = Asm.getSectionData( + SD->getSymbol().getSection()); + Index = SymSD.getOrdinal() + 1; + FixedValue += getSectionAddress(&SymSD); } if (IsPCRel) FixedValue -= getSectionAddress(Fragment->getParent()); @@ -1132,17 +1156,17 @@ public: // FIXME: Currently, these are never generated (see code below). I cannot // find a case where they are actually emitted. Type = macho::RIT_Vanilla; - } else if (SD->getSymbol().isVariable()) { - int64_t Res; - if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( - Res, Layout, SectionAddress)) { - FixedValue = Res; - return; + } else { + // Resolve constant variables. + if (SD->getSymbol().isVariable()) { + int64_t Res; + if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( + Res, Layout, SectionAddress)) { + FixedValue = Res; + return; + } } - report_fatal_error("unsupported relocation of variable '" + - SD->getSymbol().getName() + "'"); - } else { // Check whether we need an external or internal relocation. if (doesSymbolRequireExternRelocation(SD)) { IsExtern = 1; @@ -1154,8 +1178,10 @@ public: FixedValue -= Layout.getSymbolOffset(SD); } else { // The index is the section ordinal (1-based). - Index = SD->getFragment()->getParent()->getOrdinal() + 1; - FixedValue += getSectionAddress(SD->getFragment()->getParent()); + const MCSectionData &SymSD = Asm.getSectionData( + SD->getSymbol().getSection()); + Index = SymSD.getOrdinal() + 1; + FixedValue += getSectionAddress(&SymSD); } if (IsPCRel) FixedValue -= getSectionAddress(Fragment->getParent()); diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index 6ca5d37..101237a 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -647,22 +647,27 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm, COFFSection *coff_section = SectionMap[&SectionData->getSection()]; COFFSymbol *coff_symbol = SymbolMap[&A_SD.getSymbol()]; + const MCSymbolRefExpr *SymA = Target.getSymA(); + const MCSymbolRefExpr *SymB = Target.getSymB(); + const bool CrossSection = SymB && + &SymA->getSymbol().getSection() != &SymB->getSymbol().getSection(); if (Target.getSymB()) { - if (&Target.getSymA()->getSymbol().getSection() - != &Target.getSymB()->getSymbol().getSection()) { - llvm_unreachable("Symbol relative relocations are only allowed between " - "symbols in the same section"); - } const MCSymbol *B = &Target.getSymB()->getSymbol(); MCSymbolData &B_SD = Asm.getSymbolData(*B); - FixedValue = Layout.getSymbolOffset(&A_SD) - Layout.getSymbolOffset(&B_SD); + // Offset of the symbol in the section + int64_t a = Layout.getSymbolOffset(&B_SD); + + // Ofeset of the relocation in the section + int64_t b = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); + FixedValue = b - a; // In the case where we have SymbA and SymB, we just need to store the delta // between the two symbols. Update FixedValue to account for the delta, and // skip recording the relocation. - return; + if (!CrossSection) + return; } else { FixedValue = Target.getConstant(); } @@ -673,7 +678,7 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm, Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment); // Turn relocations for temporary symbols into section relocations. - if (coff_symbol->MCData->getSymbol().isTemporary()) { + if (coff_symbol->MCData->getSymbol().isTemporary() || CrossSection) { Reloc.Symb = coff_symbol->Section->Symbol; FixedValue += Layout.getFragmentOffset(coff_symbol->MCData->Fragment) + coff_symbol->MCData->getOffset(); @@ -684,7 +689,12 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm, Reloc.Data.VirtualAddress += Fixup.getOffset(); - switch ((unsigned)Fixup.getKind()) { + unsigned FixupKind = Fixup.getKind(); + + if (CrossSection) + FixupKind = FK_PCRel_4; + + switch (FixupKind) { case FK_PCRel_4: case X86::reloc_riprel_4byte: case X86::reloc_riprel_4byte_movq_load: diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt index 6a6814f..642a8ec 100644 --- a/lib/Object/CMakeLists.txt +++ b/lib/Object/CMakeLists.txt @@ -1,5 +1,7 @@ add_llvm_library(LLVMObject MachOObject.cpp + MachOObjectFile.cpp + Object.cpp ObjectFile.cpp COFFObjectFile.cpp ELFObjectFile.cpp diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index cfee82a..86bf44b 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -91,6 +91,7 @@ extern char coff_coff_section_layout_static_assert namespace { class COFFObjectFile : public ObjectFile { private: + uint64_t HeaderOff; const coff_file_header *Header; const coff_section *SectionTable; const coff_symbol *SymbolTable; @@ -185,11 +186,8 @@ char COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb) const { return ret; uint32_t Characteristics = 0; - uint32_t PointerToRawData = 0; - const coff_section *Section = getSection(symb->SectionNumber); - if (Section) { + if (const coff_section *Section = getSection(symb->SectionNumber)) { Characteristics = Section->Characteristics; - PointerToRawData = Section->PointerToRawData; } switch (symb->SectionNumber) { @@ -256,7 +254,7 @@ StringRef COFFObjectFile::getSectionName(DataRefImpl Sec) const { // Check for string table entry. First byte is '/'. if (name[0] == '/') { uint32_t Offset; - name.getAsInteger(10, Offset); + name.substr(1).getAsInteger(10, Offset); return StringRef(getString(Offset)); } @@ -287,9 +285,20 @@ bool COFFObjectFile::isSectionText(DataRefImpl Sec) const { COFFObjectFile::COFFObjectFile(MemoryBuffer *Object) : ObjectFile(Object) { - Header = reinterpret_cast<const coff_file_header *>(base); + + HeaderOff = 0; + + if (base[0] == 0x4d && base[1] == 0x5a) { + // PE/COFF, seek through MS-DOS compatibility stub and 4-byte + // PE signature to find 'normal' COFF header. + HeaderOff += *reinterpret_cast<const ulittle32_t *>(base + 0x3c); + HeaderOff += 4; + } + + Header = reinterpret_cast<const coff_file_header *>(base + HeaderOff); SectionTable = reinterpret_cast<const coff_section *>( base + + HeaderOff + sizeof(coff_file_header) + Header->SizeOfOptionalHeader); SymbolTable = @@ -303,6 +312,7 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object) ObjectFile::symbol_iterator COFFObjectFile::begin_symbols() const { DataRefImpl ret; + memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast<intptr_t>(SymbolTable); return symbol_iterator(SymbolRef(ret, this)); } @@ -310,18 +320,21 @@ ObjectFile::symbol_iterator COFFObjectFile::begin_symbols() const { ObjectFile::symbol_iterator COFFObjectFile::end_symbols() const { // The symbol table ends where the string table begins. DataRefImpl ret; + memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast<intptr_t>(StringTable); return symbol_iterator(SymbolRef(ret, this)); } ObjectFile::section_iterator COFFObjectFile::begin_sections() const { DataRefImpl ret; + memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast<intptr_t>(SectionTable); return section_iterator(SectionRef(ret, this)); } ObjectFile::section_iterator COFFObjectFile::end_sections() const { DataRefImpl ret; + memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast<intptr_t>(SectionTable + Header->NumberOfSections); return section_iterator(SectionRef(ret, this)); } diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp index 682be77..d2a2726 100644 --- a/lib/Object/ELFObjectFile.cpp +++ b/lib/Object/ELFObjectFile.cpp @@ -547,6 +547,7 @@ template<support::endianness target_endianness, bool is64Bits> ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits> ::begin_sections() const { DataRefImpl ret; + memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast<intptr_t>(base + Header->e_shoff); return section_iterator(SectionRef(ret, this)); } @@ -555,6 +556,7 @@ template<support::endianness target_endianness, bool is64Bits> ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits> ::end_sections() const { DataRefImpl ret; + memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast<intptr_t>(base + Header->e_shoff + (Header->e_shentsize * Header->e_shnum)); diff --git a/lib/Object/MachOObject.cpp b/lib/Object/MachOObject.cpp index 5e64d63..9890feb 100644 --- a/lib/Object/MachOObject.cpp +++ b/lib/Object/MachOObject.cpp @@ -12,6 +12,8 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Host.h" #include "llvm/Support/SwapByteOrder.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" using namespace llvm; using namespace llvm::object; @@ -340,3 +342,29 @@ void MachOObject::ReadSymbol64TableEntry(uint64_t SymbolTableOffset, Index * sizeof(macho::Symbol64TableEntry)); ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res); } + +/* ** */ +// Object Dumping Facilities +void MachOObject::dump() const { print(dbgs()); dbgs() << '\n'; } +void MachOObject::dumpHeader() const { printHeader(dbgs()); dbgs() << '\n'; } + +void MachOObject::printHeader(raw_ostream &O) const { + O << "('cputype', " << Header.CPUType << ")\n"; + O << "('cpusubtype', " << Header.CPUSubtype << ")\n"; + O << "('filetype', " << Header.FileType << ")\n"; + O << "('num_load_commands', " << Header.NumLoadCommands << ")\n"; + O << "('load_commands_size', " << Header.SizeOfLoadCommands << ")\n"; + O << "('flag', " << Header.Flags << ")\n"; + + // Print extended header if 64-bit. + if (is64Bit()) + O << "('reserved', " << Header64Ext.Reserved << ")\n"; +} + +void MachOObject::print(raw_ostream &O) const { + O << "Header:\n"; + printHeader(O); + O << "Load Commands:\n"; + + O << "Buffer:\n"; +} diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp new file mode 100644 index 0000000..877cbfb --- /dev/null +++ b/lib/Object/MachOObjectFile.cpp @@ -0,0 +1,327 @@ +//===- MachOObjectFile.cpp - Mach-O object file binding ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MachOObjectFile class, which binds the MachOObject +// class to the generic ObjectFile wrapper. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Triple.h" +#include "llvm/Object/MachOFormat.h" +#include "llvm/Object/MachOObject.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MachO.h" + +#include <cctype> +#include <cstring> +#include <limits> + +using namespace llvm; +using namespace object; + +namespace llvm { + +typedef MachOObject::LoadCommandInfo LoadCommandInfo; + +class MachOObjectFile : public ObjectFile { +public: + MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO) + : ObjectFile(Object), + MachOObj(MOO), + RegisteredStringTable(std::numeric_limits<uint32_t>::max()) {} + + virtual symbol_iterator begin_symbols() const; + virtual symbol_iterator end_symbols() const; + virtual section_iterator begin_sections() const; + virtual section_iterator end_sections() const; + + virtual uint8_t getBytesInAddress() const; + virtual StringRef getFileFormatName() const; + virtual unsigned getArch() const; + +protected: + virtual SymbolRef getSymbolNext(DataRefImpl Symb) const; + virtual StringRef getSymbolName(DataRefImpl Symb) const; + virtual uint64_t getSymbolAddress(DataRefImpl Symb) const; + virtual uint64_t getSymbolSize(DataRefImpl Symb) const; + virtual char getSymbolNMTypeChar(DataRefImpl Symb) const; + virtual bool isSymbolInternal(DataRefImpl Symb) const; + + virtual SectionRef getSectionNext(DataRefImpl Sec) const; + virtual StringRef getSectionName(DataRefImpl Sec) const; + virtual uint64_t getSectionAddress(DataRefImpl Sec) const; + virtual uint64_t getSectionSize(DataRefImpl Sec) const; + virtual StringRef getSectionContents(DataRefImpl Sec) const; + virtual bool isSectionText(DataRefImpl Sec) const; + +private: + MachOObject *MachOObj; + mutable uint32_t RegisteredStringTable; + + void moveToNextSection(DataRefImpl &DRI) const; + void getSymbolTableEntry(DataRefImpl DRI, + InMemoryStruct<macho::SymbolTableEntry> &Res) const; + void moveToNextSymbol(DataRefImpl &DRI) const; + void getSection(DataRefImpl DRI, InMemoryStruct<macho::Section> &Res) const; +}; + +ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) { + std::string Err; + MachOObject *MachOObj = MachOObject::LoadFromBuffer(Buffer, &Err); + if (!MachOObj) + return NULL; + return new MachOObjectFile(Buffer, MachOObj); +} + +/*===-- Symbols -----------------------------------------------------------===*/ + +void MachOObjectFile::moveToNextSymbol(DataRefImpl &DRI) const { + uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands; + while (DRI.d.a < LoadCommandCount) { + LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); + if (LCI.Command.Type == macho::LCT_Symtab) { + InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd; + MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd); + if (DRI.d.b < SymtabLoadCmd->NumSymbolTableEntries) + return; + } + + DRI.d.a++; + DRI.d.b = 0; + } +} + +void MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI, + InMemoryStruct<macho::SymbolTableEntry> &Res) const { + InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd; + LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); + MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd); + + if (RegisteredStringTable != DRI.d.a) { + MachOObj->RegisterStringTable(*SymtabLoadCmd); + RegisteredStringTable = DRI.d.a; + } + + MachOObj->ReadSymbolTableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b, + Res); +} + + +SymbolRef MachOObjectFile::getSymbolNext(DataRefImpl DRI) const { + DRI.d.b++; + moveToNextSymbol(DRI); + return SymbolRef(DRI, this); +} + +StringRef MachOObjectFile::getSymbolName(DataRefImpl DRI) const { + InMemoryStruct<macho::SymbolTableEntry> Entry; + getSymbolTableEntry(DRI, Entry); + return MachOObj->getStringAtIndex(Entry->StringIndex); +} + +uint64_t MachOObjectFile::getSymbolAddress(DataRefImpl DRI) const { + InMemoryStruct<macho::SymbolTableEntry> Entry; + getSymbolTableEntry(DRI, Entry); + return Entry->Value; +} + +uint64_t MachOObjectFile::getSymbolSize(DataRefImpl DRI) const { + return UnknownAddressOrSize; +} + +char MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI) const { + InMemoryStruct<macho::SymbolTableEntry> Entry; + getSymbolTableEntry(DRI, Entry); + + char Char; + switch (Entry->Type & macho::STF_TypeMask) { + case macho::STT_Undefined: + Char = 'u'; + break; + case macho::STT_Absolute: + case macho::STT_Section: + Char = 's'; + break; + default: + Char = '?'; + break; + } + + if (Entry->Flags & (macho::STF_External | macho::STF_PrivateExtern)) + Char = toupper(Char); + return Char; +} + +bool MachOObjectFile::isSymbolInternal(DataRefImpl DRI) const { + InMemoryStruct<macho::SymbolTableEntry> Entry; + getSymbolTableEntry(DRI, Entry); + return Entry->Flags & macho::STF_StabsEntryMask; +} + +ObjectFile::symbol_iterator MachOObjectFile::begin_symbols() const { + // DRI.d.a = segment number; DRI.d.b = symbol index. + DataRefImpl DRI; + DRI.d.a = DRI.d.b = 0; + moveToNextSymbol(DRI); + return symbol_iterator(SymbolRef(DRI, this)); +} + +ObjectFile::symbol_iterator MachOObjectFile::end_symbols() const { + DataRefImpl DRI; + DRI.d.a = MachOObj->getHeader().NumLoadCommands; + DRI.d.b = 0; + return symbol_iterator(SymbolRef(DRI, this)); +} + + +/*===-- Sections ----------------------------------------------------------===*/ + +void MachOObjectFile::moveToNextSection(DataRefImpl &DRI) const { + uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands; + while (DRI.d.a < LoadCommandCount) { + LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); + if (LCI.Command.Type == macho::LCT_Segment) { + InMemoryStruct<macho::SegmentLoadCommand> SegmentLoadCmd; + MachOObj->ReadSegmentLoadCommand(LCI, SegmentLoadCmd); + if (DRI.d.b < SegmentLoadCmd->NumSections) + return; + } else if (LCI.Command.Type == macho::LCT_Segment64) { + InMemoryStruct<macho::Segment64LoadCommand> Segment64LoadCmd; + MachOObj->ReadSegment64LoadCommand(LCI, Segment64LoadCmd); + if (DRI.d.b < Segment64LoadCmd->NumSections) + return; + } + + DRI.d.a++; + DRI.d.b = 0; + } +} + +SectionRef MachOObjectFile::getSectionNext(DataRefImpl DRI) const { + DRI.d.b++; + moveToNextSection(DRI); + return SectionRef(DRI, this); +} + +void +MachOObjectFile::getSection(DataRefImpl DRI, + InMemoryStruct<macho::Section> &Res) const { + InMemoryStruct<macho::SegmentLoadCommand> SLC; + LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); + MachOObj->ReadSegmentLoadCommand(LCI, SLC); + MachOObj->ReadSection(LCI, DRI.d.b, Res); +} + +StringRef MachOObjectFile::getSectionName(DataRefImpl DRI) const { + InMemoryStruct<macho::SegmentLoadCommand> SLC; + LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); + MachOObj->ReadSegmentLoadCommand(LCI, SLC); + InMemoryStruct<macho::Section> Sect; + MachOObj->ReadSection(LCI, DRI.d.b, Sect); + + static char Result[34]; + strcpy(Result, SLC->Name); + strcat(Result, ","); + strcat(Result, Sect->Name); + return StringRef(Result); +} + +uint64_t MachOObjectFile::getSectionAddress(DataRefImpl DRI) const { + InMemoryStruct<macho::Section> Sect; + getSection(DRI, Sect); + return Sect->Address; +} + +uint64_t MachOObjectFile::getSectionSize(DataRefImpl DRI) const { + InMemoryStruct<macho::Section> Sect; + getSection(DRI, Sect); + return Sect->Size; +} + +StringRef MachOObjectFile::getSectionContents(DataRefImpl DRI) const { + InMemoryStruct<macho::Section> Sect; + getSection(DRI, Sect); + return MachOObj->getData(Sect->Offset, Sect->Size); +} + +bool MachOObjectFile::isSectionText(DataRefImpl DRI) const { + InMemoryStruct<macho::SegmentLoadCommand> SLC; + LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); + MachOObj->ReadSegmentLoadCommand(LCI, SLC); + return !strcmp(SLC->Name, "__TEXT"); +} + +ObjectFile::section_iterator MachOObjectFile::begin_sections() const { + DataRefImpl DRI; + DRI.d.a = DRI.d.b = 0; + moveToNextSection(DRI); + return section_iterator(SectionRef(DRI, this)); +} + +ObjectFile::section_iterator MachOObjectFile::end_sections() const { + DataRefImpl DRI; + DRI.d.a = MachOObj->getHeader().NumLoadCommands; + DRI.d.b = 0; + return section_iterator(SectionRef(DRI, this)); +} + +/*===-- Miscellaneous -----------------------------------------------------===*/ + +uint8_t MachOObjectFile::getBytesInAddress() const { + return MachOObj->is64Bit() ? 8 : 4; +} + +StringRef MachOObjectFile::getFileFormatName() const { + if (!MachOObj->is64Bit()) { + switch (MachOObj->getHeader().CPUType) { + case llvm::MachO::CPUTypeI386: + return "Mach-O 32-bit i386"; + case llvm::MachO::CPUTypeARM: + return "Mach-O arm"; + case llvm::MachO::CPUTypePowerPC: + return "Mach-O 32-bit ppc"; + default: + assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64) == 0 && + "64-bit object file when we're not 64-bit?"); + return "Mach-O 32-bit unknown"; + } + } + + switch (MachOObj->getHeader().CPUType) { + case llvm::MachO::CPUTypeX86_64: + return "Mach-O 64-bit x86-64"; + case llvm::MachO::CPUTypePowerPC64: + return "Mach-O 64-bit ppc64"; + default: + assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64) == 1 && + "32-bit object file when we're 64-bit?"); + return "Mach-O 64-bit unknown"; + } +} + +unsigned MachOObjectFile::getArch() const { + switch (MachOObj->getHeader().CPUType) { + case llvm::MachO::CPUTypeI386: + return Triple::x86; + case llvm::MachO::CPUTypeX86_64: + return Triple::x86_64; + case llvm::MachO::CPUTypeARM: + return Triple::arm; + case llvm::MachO::CPUTypePowerPC: + return Triple::ppc; + case llvm::MachO::CPUTypePowerPC64: + return Triple::ppc64; + default: + return Triple::UnknownArch; + } +} + +} // end namespace llvm + diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp new file mode 100644 index 0000000..603b23c --- /dev/null +++ b/lib/Object/Object.cpp @@ -0,0 +1,59 @@ +//===- Object.cpp - C bindings to the object file library--------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the C bindings to the file-format-independent object +// library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/ObjectFile.h" +#include "llvm-c/Object.h" + +using namespace llvm; +using namespace object; + +LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf) { + return wrap(ObjectFile::createObjectFile(unwrap(MemBuf))); +} + +void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile) { + delete unwrap(ObjectFile); +} + +LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef ObjectFile) { + ObjectFile::section_iterator SI = unwrap(ObjectFile)->begin_sections(); + return wrap(new ObjectFile::section_iterator(SI)); +} + +void LLVMDisposeSectionIterator(LLVMSectionIteratorRef SI) { + delete unwrap(SI); +} + +LLVMBool LLVMIsSectionIteratorAtEnd(LLVMObjectFileRef ObjectFile, + LLVMSectionIteratorRef SI) { + return (*unwrap(SI) == unwrap(ObjectFile)->end_sections()) ? 1 : 0; +} + +void LLVMMoveToNextSection(LLVMSectionIteratorRef SI) { + // We can't use unwrap() here because the argument to ++ must be an lvalue. + ++*reinterpret_cast<ObjectFile::section_iterator*>(SI); +} + +const char *LLVMGetSectionName(LLVMSectionIteratorRef SI) { + return (*unwrap(SI))->getName().data(); +} + +uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI) { + return (*unwrap(SI))->getSize(); +} + +const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI) { + return (*unwrap(SI))->getContents().data(); +} + diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp index 161ae3a..47b6311 100644 --- a/lib/Object/ObjectFile.cpp +++ b/lib/Object/ObjectFile.cpp @@ -55,7 +55,7 @@ ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) { case sys::Mach_O_DynamicLinker_FileType: case sys::Mach_O_Bundle_FileType: case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType: - return 0; + return createMachOObjectFile(Object); case sys::COFF_FileType: return createCOFFObjectFile(Object); default: diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index e765ba0..c3169ac 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -726,7 +726,7 @@ APFloat::bitwiseIsEqual(const APFloat &rhs) const { } APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) -{ + : exponent2(0), sign2(0) { assertArithmeticOK(ourSemantics); initialize(&ourSemantics); sign = 0; @@ -736,14 +736,15 @@ APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) normalize(rmNearestTiesToEven, lfExactlyZero); } -APFloat::APFloat(const fltSemantics &ourSemantics) { +APFloat::APFloat(const fltSemantics &ourSemantics) : exponent2(0), sign2(0) { assertArithmeticOK(ourSemantics); initialize(&ourSemantics); category = fcZero; sign = false; } -APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) { +APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) + : exponent2(0), sign2(0) { assertArithmeticOK(ourSemantics); // Allocates storage if necessary but does not initialize it. initialize(&ourSemantics); @@ -751,7 +752,7 @@ APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) { APFloat::APFloat(const fltSemantics &ourSemantics, fltCategory ourCategory, bool negative) -{ + : exponent2(0), sign2(0) { assertArithmeticOK(ourSemantics); initialize(&ourSemantics); category = ourCategory; @@ -763,14 +764,13 @@ APFloat::APFloat(const fltSemantics &ourSemantics, } APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) -{ + : exponent2(0), sign2(0) { assertArithmeticOK(ourSemantics); initialize(&ourSemantics); convertFromString(text, rmNearestTiesToEven); } -APFloat::APFloat(const APFloat &rhs) -{ +APFloat::APFloat(const APFloat &rhs) : exponent2(0), sign2(0) { initialize(rhs.semantics); assign(rhs); } @@ -3257,18 +3257,15 @@ APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) { return Val; } -APFloat::APFloat(const APInt& api, bool isIEEE) -{ +APFloat::APFloat(const APInt& api, bool isIEEE) : exponent2(0), sign2(0) { initFromAPInt(api, isIEEE); } -APFloat::APFloat(float f) -{ +APFloat::APFloat(float f) : exponent2(0), sign2(0) { initFromAPInt(APInt::floatToBits(f)); } -APFloat::APFloat(double d) -{ +APFloat::APFloat(double d) : exponent2(0), sign2(0) { initFromAPInt(APInt::doubleToBits(d)); } @@ -3565,3 +3562,37 @@ void APFloat::toString(SmallVectorImpl<char> &Str, for (; I != NDigits; ++I) Str.push_back(buffer[NDigits-I-1]); } + +bool APFloat::getExactInverse(APFloat *inv) const { + // We can only guarantee the existence of an exact inverse for IEEE floats. + if (semantics != &IEEEhalf && semantics != &IEEEsingle && + semantics != &IEEEdouble && semantics != &IEEEquad) + return false; + + // Special floats and denormals have no exact inverse. + if (category != fcNormal) + return false; + + // Check that the number is a power of two by making sure that only the + // integer bit is set in the significand. + if (significandLSB() != semantics->precision - 1) + return false; + + // Get the inverse. + APFloat reciprocal(*semantics, 1ULL); + if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) + return false; + + // Avoid multiplication with a denormal, it is not safe on all platforms and + // may be slower than a normal division. + if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision) + return false; + + assert(reciprocal.category == fcNormal && + reciprocal.significandLSB() == reciprocal.semantics->precision - 1); + + if (inv) + *inv = reciprocal; + + return true; +} diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 08f36d2..23a22ac 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -1517,13 +1517,15 @@ APInt::ms APInt::magic() const { /// division by a constant as a sequence of multiplies, adds and shifts. /// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry /// S. Warren, Jr., chapter 10. -APInt::mu APInt::magicu() const { +/// LeadingZeros can be used to simplify the calculation if the upper bits +/// of the divided value are known zero. +APInt::mu APInt::magicu(unsigned LeadingZeros) const { const APInt& d = *this; unsigned p; APInt nc, delta, q1, r1, q2, r2; struct mu magu; magu.a = 0; // initialize "add" indicator - APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()); + APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros); APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth()); @@ -2076,6 +2078,16 @@ APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const { return Res; } +APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const { + APInt Res = *this * RHS; + + if (*this != 0 && RHS != 0) + Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS; + else + Overflow = false; + return Res; +} + APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const { Overflow = ShAmt >= getBitWidth(); if (Overflow) diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp index 5e27df6..215b0f2 100644 --- a/lib/Support/Allocator.cpp +++ b/lib/Support/Allocator.cpp @@ -136,6 +136,14 @@ unsigned BumpPtrAllocator::GetNumSlabs() const { return NumSlabs; } +size_t BumpPtrAllocator::getTotalMemory() const { + size_t TotalMemory = 0; + for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) { + TotalMemory += Slab->Size; + } + return TotalMemory; +} + void BumpPtrAllocator::PrintStats() const { unsigned NumSlabs = 0; size_t TotalMemory = 0; diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 7e74499..7f1c0d3 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -186,12 +186,14 @@ static Option *LookupOption(StringRef &Arg, StringRef &Value, /// have already been stripped. static Option *LookupNearestOption(StringRef Arg, const StringMap<Option*> &OptionsMap, - const char *&NearestString) { + std::string &NearestString) { // Reject all dashes. if (Arg.empty()) return 0; // Split on any equal sign. - StringRef LHS = Arg.split('=').first; + std::pair<StringRef, StringRef> SplitArg = Arg.split('='); + StringRef &LHS = SplitArg.first; // LHS == Arg when no '=' is present. + StringRef &RHS = SplitArg.second; // Find the closest match. Option *Best = 0; @@ -204,14 +206,19 @@ static Option *LookupNearestOption(StringRef Arg, if (O->ArgStr[0]) OptionNames.push_back(O->ArgStr); + bool PermitValue = O->getValueExpectedFlag() != cl::ValueDisallowed; + StringRef Flag = PermitValue ? LHS : Arg; for (size_t i = 0, e = OptionNames.size(); i != e; ++i) { StringRef Name = OptionNames[i]; unsigned Distance = StringRef(Name).edit_distance( - Arg, /*AllowReplacements=*/true, /*MaxEditDistance=*/BestDistance); + Flag, /*AllowReplacements=*/true, /*MaxEditDistance=*/BestDistance); if (!Best || Distance < BestDistance) { Best = O; - NearestString = OptionNames[i]; BestDistance = Distance; + if (RHS.empty() || !PermitValue) + NearestString = OptionNames[i]; + else + NearestString = std::string(OptionNames[i]) + "=" + RHS.str(); } } } @@ -611,7 +618,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv, for (int i = 1; i < argc; ++i) { Option *Handler = 0; Option *NearestHandler = 0; - const char *NearestHandlerString = 0; + std::string NearestHandlerString; StringRef Value; StringRef ArgName = ""; @@ -908,8 +915,6 @@ void alias::printOptionInfo(size_t GlobalWidth) const { errs().indent(GlobalWidth-L-6) << " - " << HelpStr << "\n"; } - - //===----------------------------------------------------------------------===// // Parser Implementation code... // @@ -939,7 +944,11 @@ void basic_parser_impl::printOptionInfo(const Option &O, outs().indent(GlobalWidth-getOptionWidth(O)) << " - " << O.HelpStr << '\n'; } - +void basic_parser_impl::printOptionName(const Option &O, + size_t GlobalWidth) const { + outs() << " -" << O.ArgStr; + outs().indent(GlobalWidth-std::strlen(O.ArgStr)); +} // parser<bool> implementation @@ -1083,6 +1092,89 @@ void generic_parser_base::printOptionInfo(const Option &O, } } +static const size_t MaxOptWidth = 8; // arbitrary spacing for printOptionDiff + +// printGenericOptionDiff - Print the value of this option and it's default. +// +// "Generic" options have each value mapped to a name. +void generic_parser_base:: +printGenericOptionDiff(const Option &O, const GenericOptionValue &Value, + const GenericOptionValue &Default, + size_t GlobalWidth) const { + outs() << " -" << O.ArgStr; + outs().indent(GlobalWidth-std::strlen(O.ArgStr)); + + unsigned NumOpts = getNumOptions(); + for (unsigned i = 0; i != NumOpts; ++i) { + if (Value.compare(getOptionValue(i))) + continue; + + outs() << "= " << getOption(i); + size_t L = std::strlen(getOption(i)); + size_t NumSpaces = MaxOptWidth > L ? MaxOptWidth - L : 0; + outs().indent(NumSpaces) << " (default: "; + for (unsigned j = 0; j != NumOpts; ++j) { + if (Default.compare(getOptionValue(j))) + continue; + outs() << getOption(j); + break; + } + outs() << ")\n"; + return; + } + outs() << "= *unknown option value*\n"; +} + +// printOptionDiff - Specializations for printing basic value types. +// +#define PRINT_OPT_DIFF(T) \ + void parser<T>:: \ + printOptionDiff(const Option &O, T V, OptionValue<T> D, \ + size_t GlobalWidth) const { \ + printOptionName(O, GlobalWidth); \ + std::string Str; \ + { \ + raw_string_ostream SS(Str); \ + SS << V; \ + } \ + outs() << "= " << Str; \ + size_t NumSpaces = MaxOptWidth > Str.size() ? MaxOptWidth - Str.size() : 0;\ + outs().indent(NumSpaces) << " (default: "; \ + if (D.hasValue()) \ + outs() << D.getValue(); \ + else \ + outs() << "*no default*"; \ + outs() << ")\n"; \ + } \ + +PRINT_OPT_DIFF(bool) +PRINT_OPT_DIFF(boolOrDefault) +PRINT_OPT_DIFF(int) +PRINT_OPT_DIFF(unsigned) +PRINT_OPT_DIFF(double) +PRINT_OPT_DIFF(float) +PRINT_OPT_DIFF(char) + +void parser<std::string>:: +printOptionDiff(const Option &O, StringRef V, OptionValue<std::string> D, + size_t GlobalWidth) const { + printOptionName(O, GlobalWidth); + outs() << "= " << V; + size_t NumSpaces = MaxOptWidth > V.size() ? MaxOptWidth - V.size() : 0; + outs().indent(NumSpaces) << " (default: "; + if (D.hasValue()) + outs() << D.getValue(); + else + outs() << "*no default*"; + outs() << ")\n"; +} + +// Print a placeholder for options that don't yet support printOptionDiff(). +void basic_parser_impl:: +printOptionNoValue(const Option &O, size_t GlobalWidth) const { + printOptionName(O, GlobalWidth); + outs() << "= *cannot print option value*\n"; +} //===----------------------------------------------------------------------===// // -help and -help-hidden option implementation @@ -1094,6 +1186,35 @@ static int OptNameCompare(const void *LHS, const void *RHS) { return strcmp(((pair_ty*)LHS)->first, ((pair_ty*)RHS)->first); } +// Copy Options into a vector so we can sort them as we like. +static void +sortOpts(StringMap<Option*> &OptMap, + SmallVectorImpl< std::pair<const char *, Option*> > &Opts, + bool ShowHidden) { + SmallPtrSet<Option*, 128> OptionSet; // Duplicate option detection. + + for (StringMap<Option*>::iterator I = OptMap.begin(), E = OptMap.end(); + I != E; ++I) { + // Ignore really-hidden options. + if (I->second->getOptionHiddenFlag() == ReallyHidden) + continue; + + // Unless showhidden is set, ignore hidden flags. + if (I->second->getOptionHiddenFlag() == Hidden && !ShowHidden) + continue; + + // If we've already seen this option, don't add it to the list again. + if (!OptionSet.insert(I->second)) + continue; + + Opts.push_back(std::pair<const char *, Option*>(I->getKey().data(), + I->second)); + } + + // Sort the options list alphabetically. + qsort(Opts.data(), Opts.size(), sizeof(Opts[0]), OptNameCompare); +} + namespace { class HelpPrinter { @@ -1115,30 +1236,8 @@ public: StringMap<Option*> OptMap; GetOptionInfo(PositionalOpts, SinkOpts, OptMap); - // Copy Options into a vector so we can sort them as we like. SmallVector<std::pair<const char *, Option*>, 128> Opts; - SmallPtrSet<Option*, 128> OptionSet; // Duplicate option detection. - - for (StringMap<Option*>::iterator I = OptMap.begin(), E = OptMap.end(); - I != E; ++I) { - // Ignore really-hidden options. - if (I->second->getOptionHiddenFlag() == ReallyHidden) - continue; - - // Unless showhidden is set, ignore hidden flags. - if (I->second->getOptionHiddenFlag() == Hidden && !ShowHidden) - continue; - - // If we've already seen this option, don't add it to the list again. - if (!OptionSet.insert(I->second)) - continue; - - Opts.push_back(std::pair<const char *, Option*>(I->getKey().data(), - I->second)); - } - - // Sort the options list alphabetically. - qsort(Opts.data(), Opts.size(), sizeof(Opts[0]), OptNameCompare); + sortOpts(OptMap, Opts, ShowHidden); if (ProgramOverview) outs() << "OVERVIEW: " << ProgramOverview << "\n"; @@ -1197,6 +1296,38 @@ static cl::opt<HelpPrinter, true, parser<bool> > HHOp("help-hidden", cl::desc("Display all available options"), cl::location(HiddenPrinter), cl::Hidden, cl::ValueDisallowed); +static cl::opt<bool> +PrintOptions("print-options", + cl::desc("Print non-default options after command line parsing"), + cl::Hidden, cl::init(false)); + +static cl::opt<bool> +PrintAllOptions("print-all-options", + cl::desc("Print all option values after command line parsing"), + cl::Hidden, cl::init(false)); + +// Print the value of each option. +void cl::PrintOptionValues() { + if (!PrintOptions && !PrintAllOptions) return; + + // Get all the options. + SmallVector<Option*, 4> PositionalOpts; + SmallVector<Option*, 4> SinkOpts; + StringMap<Option*> OptMap; + GetOptionInfo(PositionalOpts, SinkOpts, OptMap); + + SmallVector<std::pair<const char *, Option*>, 128> Opts; + sortOpts(OptMap, Opts, /*ShowHidden*/true); + + // Compute the maximum argument length... + size_t MaxArgLen = 0; + for (size_t i = 0, e = Opts.size(); i != e; ++i) + MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth()); + + for (size_t i = 0, e = Opts.size(); i != e; ++i) + Opts[i].second->printOptionValue(MaxArgLen, PrintAllOptions); +} + static void (*OverrideVersionPrinter)() = 0; static int TargetArraySortFn(const void *LHS, const void *RHS) { diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp index bf8ca3f..899c389 100644 --- a/lib/Support/CrashRecoveryContext.cpp +++ b/lib/Support/CrashRecoveryContext.cpp @@ -57,12 +57,36 @@ public: static sys::Mutex gCrashRecoveryContexMutex; static bool gCrashRecoveryEnabled = false; +static sys::ThreadLocal<const CrashRecoveryContextCleanup> + tlIsRecoveringFromCrash; + +CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {} + CrashRecoveryContext::~CrashRecoveryContext() { + // Reclaim registered resources. + CrashRecoveryContextCleanup *i = head; + tlIsRecoveringFromCrash.set(head); + while (i) { + CrashRecoveryContextCleanup *tmp = i; + i = tmp->next; + tmp->cleanupFired = true; + tmp->recoverResources(); + delete tmp; + } + tlIsRecoveringFromCrash.erase(); + CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl; delete CRCI; } +bool CrashRecoveryContext::isRecoveringFromCrash() { + return tlIsRecoveringFromCrash.get() != 0; +} + CrashRecoveryContext *CrashRecoveryContext::GetCurrent() { + if (!gCrashRecoveryEnabled) + return 0; + const CrashRecoveryContextImpl *CRCI = CurrentContext.get(); if (!CRCI) return 0; @@ -70,6 +94,33 @@ CrashRecoveryContext *CrashRecoveryContext::GetCurrent() { return CRCI->CRC; } +void CrashRecoveryContext::registerCleanup(CrashRecoveryContextCleanup *cleanup) +{ + if (!cleanup) + return; + if (head) + head->prev = cleanup; + cleanup->next = head; + head = cleanup; +} + +void +CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) { + if (!cleanup) + return; + if (cleanup == head) { + head = cleanup->next; + if (head) + head->prev = 0; + } + else { + cleanup->prev->next = cleanup->next; + if (cleanup->next) + cleanup->next->prev = cleanup->prev; + } + delete cleanup; +} + #ifdef LLVM_ON_WIN32 // FIXME: No real Win32 implementation currently. diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp index 9799ef5..74a9fda 100644 --- a/lib/Support/Dwarf.cpp +++ b/lib/Support/Dwarf.cpp @@ -203,6 +203,10 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) { case DW_AT_APPLE_major_runtime_vers: return "DW_AT_APPLE_major_runtime_vers"; case DW_AT_APPLE_runtime_class: return "DW_AT_APPLE_runtime_class"; case DW_AT_APPLE_omit_frame_ptr: return "DW_AT_APPLE_omit_frame_ptr"; + case DW_AT_APPLE_property_name: return "DW_AT_APPLE_property_name"; + case DW_AT_APPLE_property_getter: return "DW_AT_APPLE_property_getter"; + case DW_AT_APPLE_property_setter: return "DW_AT_APPLE_property_setter"; + case DW_AT_APPLE_property_attribute: return "DW_AT_APPLE_property_attribute"; } return 0; } @@ -391,6 +395,7 @@ const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) { case DW_OP_call_ref: return "DW_OP_call_ref"; case DW_OP_form_tls_address: return "DW_OP_form_tls_address"; case DW_OP_call_frame_cfa: return "DW_OP_call_frame_cfa"; + case DW_OP_bit_piece: return "DW_OP_bit_piece"; case DW_OP_lo_user: return "DW_OP_lo_user"; case DW_OP_hi_user: return "DW_OP_hi_user"; } diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp index 3579546..e6cc57d 100644 --- a/lib/Support/ErrorHandling.cpp +++ b/lib/Support/ErrorHandling.cpp @@ -32,7 +32,6 @@ #endif using namespace llvm; -using namespace std; static fatal_error_handler_t ErrorHandler = 0; static void *ErrorHandlerUserData = 0; diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp index 5dbabee..4c8c0c6 100644 --- a/lib/Support/FileUtilities.cpp +++ b/lib/Support/FileUtilities.cpp @@ -198,7 +198,7 @@ int llvm::DiffFilesWithTolerance(const sys::PathWithStatus &FileA, return 1; } - // Now its safe to mmap the files into memory becasue both files + // Now its safe to mmap the files into memory because both files // have a non-zero size. error_code ec; OwningPtr<MemoryBuffer> F1; diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp index a4f80a9..d2e35b8 100644 --- a/lib/Support/FoldingSet.cpp +++ b/lib/Support/FoldingSet.cpp @@ -147,6 +147,11 @@ void FoldingSetNodeID::AddString(StringRef String) { Bits.push_back(V); } +// AddNodeID - Adds the Bit data of another ID to *this. +void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) { + Bits.append(ID.Bits.begin(), ID.Bits.end()); +} + /// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to /// lookup the node in the FoldingSetImpl. unsigned FoldingSetNodeID::ComputeHash() const { diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index 4dacf96..911c64a 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -214,6 +214,8 @@ std::string sys::getHostCPUName() { // As found in a Summer 2010 model iMac. case 37: // Intel Core i7, laptop version. return "corei7"; + case 42: // SandyBridge + return "sandybridge"; case 28: // Intel Atom processor. All processors are manufactured using // the 45 nm process diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index a0c650d..e2b5b7a 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -46,8 +46,10 @@ MemoryBuffer::~MemoryBuffer() { } /// init - Initialize this MemoryBuffer as a reference to externally allocated /// memory, memory that we know is already null terminated. -void MemoryBuffer::init(const char *BufStart, const char *BufEnd) { - assert(BufEnd[0] == 0 && "Buffer is not null terminated!"); +void MemoryBuffer::init(const char *BufStart, const char *BufEnd, + bool RequiresNullTerminator) { + assert((!RequiresNullTerminator || BufEnd[0] == 0) && + "Buffer is not null terminated!"); BufferStart = BufStart; BufferEnd = BufEnd; } @@ -65,32 +67,39 @@ static void CopyStringRef(char *Memory, StringRef Data) { /// GetNamedBuffer - Allocates a new MemoryBuffer with Name copied after it. template <typename T> -static T* GetNamedBuffer(StringRef Buffer, StringRef Name) { +static T* GetNamedBuffer(StringRef Buffer, StringRef Name, + bool RequiresNullTerminator) { char *Mem = static_cast<char*>(operator new(sizeof(T) + Name.size() + 1)); CopyStringRef(Mem + sizeof(T), Name); - return new (Mem) T(Buffer); + return new (Mem) T(Buffer, RequiresNullTerminator); } namespace { /// MemoryBufferMem - Named MemoryBuffer pointing to a block of memory. class MemoryBufferMem : public MemoryBuffer { public: - MemoryBufferMem(StringRef InputData) { - init(InputData.begin(), InputData.end()); + MemoryBufferMem(StringRef InputData, bool RequiresNullTerminator) { + init(InputData.begin(), InputData.end(), RequiresNullTerminator); } virtual const char *getBufferIdentifier() const { // The name is stored after the class itself. return reinterpret_cast<const char*>(this + 1); } + + virtual BufferKind getBufferKind() const { + return MemoryBuffer_Malloc; + } }; } /// getMemBuffer - Open the specified memory range as a MemoryBuffer. Note /// that EndPtr[0] must be a null byte and be accessible! MemoryBuffer *MemoryBuffer::getMemBuffer(StringRef InputData, - StringRef BufferName) { - return GetNamedBuffer<MemoryBufferMem>(InputData, BufferName); + StringRef BufferName, + bool RequiresNullTerminator) { + return GetNamedBuffer<MemoryBufferMem>(InputData, BufferName, + RequiresNullTerminator); } /// getMemBufferCopy - Open the specified memory range as a MemoryBuffer, @@ -127,7 +136,7 @@ MemoryBuffer *MemoryBuffer::getNewUninitMemBuffer(size_t Size, char *Buf = Mem + AlignedStringLen; Buf[Size] = 0; // Null terminate buffer. - return new (Mem) MemoryBufferMem(StringRef(Buf, Size)); + return new (Mem) MemoryBufferMem(StringRef(Buf, Size), true); } /// getNewMemBuffer - Allocate a new MemoryBuffer of the specified size that @@ -172,26 +181,41 @@ namespace { /// sys::Path::UnMapFilePages method. class MemoryBufferMMapFile : public MemoryBufferMem { public: - MemoryBufferMMapFile(StringRef Buffer) - : MemoryBufferMem(Buffer) { } + MemoryBufferMMapFile(StringRef Buffer, bool RequiresNullTerminator) + : MemoryBufferMem(Buffer, RequiresNullTerminator) { } ~MemoryBufferMMapFile() { - sys::Path::UnMapFilePages(getBufferStart(), getBufferSize()); + static int PageSize = sys::Process::GetPageSize(); + + uintptr_t Start = reinterpret_cast<uintptr_t>(getBufferStart()); + size_t Size = getBufferSize(); + uintptr_t RealStart = Start & ~(PageSize - 1); + size_t RealSize = Size + (Start - RealStart); + + sys::Path::UnMapFilePages(reinterpret_cast<const char*>(RealStart), + RealSize); + } + + virtual BufferKind getBufferKind() const { + return MemoryBuffer_MMap; } }; } error_code MemoryBuffer::getFile(StringRef Filename, OwningPtr<MemoryBuffer> &result, - int64_t FileSize) { + int64_t FileSize, + bool RequiresNullTerminator) { // Ensure the path is null terminated. SmallString<256> PathBuf(Filename.begin(), Filename.end()); - return MemoryBuffer::getFile(PathBuf.c_str(), result, FileSize); + return MemoryBuffer::getFile(PathBuf.c_str(), result, FileSize, + RequiresNullTerminator); } error_code MemoryBuffer::getFile(const char *Filename, OwningPtr<MemoryBuffer> &result, - int64_t FileSize) { + int64_t FileSize, + bool RequiresNullTerminator) { int OpenFlags = O_RDONLY; #ifdef O_BINARY OpenFlags |= O_BINARY; // Open input file in binary mode on win32. @@ -200,17 +224,32 @@ error_code MemoryBuffer::getFile(const char *Filename, if (FD == -1) { return error_code(errno, posix_category()); } - error_code ret = getOpenFile(FD, Filename, result, FileSize); + error_code ret = getOpenFile(FD, Filename, result, FileSize, FileSize, + 0, RequiresNullTerminator); close(FD); return ret; } -error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, - OwningPtr<MemoryBuffer> &result, - int64_t FileSize) { +static bool shouldUseMmap(int FD, + size_t FileSize, + size_t MapSize, + off_t Offset, + bool RequiresNullTerminator, + int PageSize) { + // We don't use mmap for small files because this can severely fragment our + // address space. + if (MapSize < 4096*4) + return false; + + if (!RequiresNullTerminator) + return true; + + // If we don't know the file size, use fstat to find out. fstat on an open // file descriptor is cheaper than stat on a random path. - if (FileSize == -1) { + // FIXME: this chunk of code is duplicated, but it avoids a fstat when + // RequiresNullTerminator = false and MapSize != -1. + if (FileSize == size_t(-1)) { struct stat FileInfo; // TODO: This should use fstat64 when available. if (fstat(FD, &FileInfo) == -1) { @@ -219,23 +258,59 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, FileSize = FileInfo.st_size; } + // If we need a null terminator and the end of the map is inside the file, + // we cannot use mmap. + size_t End = Offset + MapSize; + assert(End <= FileSize); + if (End != FileSize) + return false; - // If the file is large, try to use mmap to read it in. We don't use mmap - // for small files, because this can severely fragment our address space. Also - // don't try to map files that are exactly a multiple of the system page size, - // as the file would not have the required null terminator. - // - // FIXME: Can we just mmap an extra page in the latter case? - if (FileSize >= 4096*4 && - (FileSize & (sys::Process::GetPageSize()-1)) != 0) { - if (const char *Pages = sys::Path::MapInFilePages(FD, FileSize)) { + // Don't try to map files that are exactly a multiple of the system page size + // if we need a null terminator. + if ((FileSize & (PageSize -1)) == 0) + return false; + + return true; +} + +error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, + OwningPtr<MemoryBuffer> &result, + size_t FileSize, size_t MapSize, + off_t Offset, + bool RequiresNullTerminator) { + static int PageSize = sys::Process::GetPageSize(); + + // Default is to map the full file. + if (MapSize == size_t(-1)) { + // If we don't know the file size, use fstat to find out. fstat on an open + // file descriptor is cheaper than stat on a random path. + if (FileSize == size_t(-1)) { + struct stat FileInfo; + // TODO: This should use fstat64 when available. + if (fstat(FD, &FileInfo) == -1) { + return error_code(errno, posix_category()); + } + FileSize = FileInfo.st_size; + } + MapSize = FileSize; + } + + if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator, + PageSize)) { + off_t RealMapOffset = Offset & ~(PageSize - 1); + off_t Delta = Offset - RealMapOffset; + size_t RealMapSize = MapSize + Delta; + + if (const char *Pages = sys::Path::MapInFilePages(FD, + RealMapSize, + RealMapOffset)) { result.reset(GetNamedBuffer<MemoryBufferMMapFile>( - StringRef(Pages, FileSize), Filename)); + StringRef(Pages + Delta, MapSize), Filename, RequiresNullTerminator)); return success; } } - MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(FileSize, Filename); + MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(MapSize, Filename); if (!Buf) { // Failed to create a buffer. The only way it can fail is if // new(std::nothrow) returns 0. @@ -245,7 +320,10 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, OwningPtr<MemoryBuffer> SB(Buf); char *BufPtr = const_cast<char*>(SB->getBufferStart()); - size_t BytesLeft = FileSize; + size_t BytesLeft = MapSize; + if (lseek(FD, Offset, SEEK_SET) == -1) + return error_code(errno, posix_category()); + while (BytesLeft) { ssize_t NumRead = ::read(FD, BufPtr, BytesLeft); if (NumRead == -1) { diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp index e5e875b..8fbaf2d 100644 --- a/lib/Support/Path.cpp +++ b/lib/Support/Path.cpp @@ -15,11 +15,15 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Config/config.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Endian.h" #include <cassert> #include <cstring> #include <ostream> using namespace llvm; using namespace sys; +namespace { +using support::ulittle32_t; +} //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only TRULY operating system @@ -88,15 +92,21 @@ sys::IdentifyFileType(const char *magic, unsigned length) { } break; + // The two magic numbers for mach-o are: + // 0xfeedface - 32-bit mach-o + // 0xfeedfacf - 64-bit mach-o case 0xFE: - case 0xCE: { + case 0xCE: + case 0xCF: { uint16_t type = 0; if (magic[0] == char(0xFE) && magic[1] == char(0xED) && - magic[2] == char(0xFA) && magic[3] == char(0xCE)) { + magic[2] == char(0xFA) && + (magic[3] == char(0xCE) || magic[3] == char(0xCF))) { /* Native endian */ if (length >= 16) type = magic[14] << 8 | magic[15]; - } else if (magic[0] == char(0xCE) && magic[1] == char(0xFA) && - magic[2] == char(0xED) && magic[3] == char(0xFE)) { + } else if ((magic[0] == char(0xCE) || magic[0] == char(0xCF)) && + magic[1] == char(0xFA) && magic[2] == char(0xED) && + magic[3] == char(0xFE)) { /* Reverse endian */ if (length >= 14) type = magic[13] << 8 | magic[12]; } @@ -129,6 +139,16 @@ sys::IdentifyFileType(const char *magic, unsigned length) { if (magic[1] == 0x02) return COFF_FileType; break; + + case 0x4d: // Possible MS-DOS stub on Windows PE file + if (magic[1] == 0x5a) { + uint32_t off = *reinterpret_cast<const ulittle32_t *>(magic + 0x3c); + // PE/COFF file, either EXE or DLL. + if (off < length && memcmp(magic + off, "PE\0\0",4) == 0) + return COFF_FileType; + } + break; + case 0x64: // x86-64 Windows. if (magic[1] == char(0x86)) return COFF_FileType; diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp index a9f4709..082b701 100644 --- a/lib/Support/PrettyStackTrace.cpp +++ b/lib/Support/PrettyStackTrace.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This file defines some helpful functions for dealing with the possibility of -// Unix signals occuring while your program is running. +// Unix signals occurring while your program is running. // //===----------------------------------------------------------------------===// diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp index 309ffb0..d293da0 100644 --- a/lib/Support/Regex.cpp +++ b/lib/Support/Regex.cpp @@ -82,7 +82,7 @@ bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){ Matches->push_back(StringRef()); continue; } - assert(pm[i].rm_eo > pm[i].rm_so); + assert(pm[i].rm_eo >= pm[i].rm_so); Matches->push_back(StringRef(String.data()+pm[i].rm_so, pm[i].rm_eo-pm[i].rm_so)); } diff --git a/lib/Support/Signals.cpp b/lib/Support/Signals.cpp index a3af37d..a117893 100644 --- a/lib/Support/Signals.cpp +++ b/lib/Support/Signals.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This file defines some helpful functions for dealing with the possibility of -// Unix signals occuring while your program is running. +// Unix signals occurring while your program is running. // //===----------------------------------------------------------------------===// diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp index 504e649..997ce0b 100644 --- a/lib/Support/SmallPtrSet.cpp +++ b/lib/Support/SmallPtrSet.cpp @@ -52,10 +52,14 @@ bool SmallPtrSetImpl::insert_imp(const void * Ptr) { // Otherwise, hit the big set case, which will call grow. } - // If more than 3/4 of the array is full, grow. - if (NumElements*4 >= CurArraySize*3 || - CurArraySize-(NumElements+NumTombstones) < CurArraySize/8) - Grow(); + if (NumElements*4 >= CurArraySize*3) { + // If more than 3/4 of the array is full, grow. + Grow(CurArraySize < 64 ? 128 : CurArraySize*2); + } else if (CurArraySize-(NumElements+NumTombstones) < CurArraySize/8) { + // If fewer of 1/8 of the array is empty (meaning that many are filled with + // tombstones), rehash. + Grow(CurArraySize); + } // Okay, we know we have space. Find a hash bucket. const void **Bucket = const_cast<const void**>(FindBucketFor(Ptr)); @@ -125,10 +129,9 @@ const void * const *SmallPtrSetImpl::FindBucketFor(const void *Ptr) const { /// Grow - Allocate a larger backing store for the buckets and move it over. /// -void SmallPtrSetImpl::Grow() { +void SmallPtrSetImpl::Grow(unsigned NewSize) { // Allocate at twice as many buckets, but at least 128. unsigned OldSize = CurArraySize; - unsigned NewSize = OldSize < 64 ? 128 : OldSize*2; const void **OldBuckets = CurArray; bool WasSmall = isSmall(); diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp index f0ed626..1e733d9 100644 --- a/lib/Support/Statistic.cpp +++ b/lib/Support/Statistic.cpp @@ -101,6 +101,10 @@ void llvm::EnableStatistics() { Enabled.setValue(true); } +bool llvm::AreStatisticsEnabled() { + return Enabled; +} + void llvm::PrintStatistics(raw_ostream &OS) { StatisticInfo &Stats = *StatInfo; diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp index 90ec299..a1ac512 100644 --- a/lib/Support/StringMap.cpp +++ b/lib/Support/StringMap.cpp @@ -169,6 +169,8 @@ StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) { TheTable[Bucket].Item = getTombstoneVal(); --NumItems; ++NumTombstones; + assert(NumItems + NumTombstones <= NumBuckets); + return Result; } @@ -177,7 +179,19 @@ StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) { /// RehashTable - Grow the table, redistributing values into the buckets with /// the appropriate mod-of-hashtable-size. void StringMapImpl::RehashTable() { - unsigned NewSize = NumBuckets*2; + unsigned NewSize; + + // If the hash table is now more than 3/4 full, or if fewer than 1/8 of + // the buckets are empty (meaning that many are filled with tombstones), + // grow/rehash the table. + if (NumItems*4 > NumBuckets*3) { + NewSize = NumBuckets*2; + } else if (NumBuckets-(NumItems+NumTombstones) < NumBuckets/8) { + NewSize = NumBuckets; + } else { + return; + } + // Allocate one extra bucket which will always be non-empty. This allows the // iterators to stop at end. ItemBucket *NewTableArray =(ItemBucket*)calloc(NewSize+1, sizeof(ItemBucket)); @@ -212,4 +226,5 @@ void StringMapImpl::RehashTable() { TheTable = NewTableArray; NumBuckets = NewSize; + NumTombstones = 0; } diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index 5398051..8c3fc09 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -131,7 +131,7 @@ unsigned StringRef::edit_distance(llvm::StringRef Other, /// find - Search for the first string \arg Str in the string. /// -/// \return - The index of the first occurence of \arg Str, or npos if not +/// \return - The index of the first occurrence of \arg Str, or npos if not /// found. size_t StringRef::find(StringRef Str, size_t From) const { size_t N = Str.size(); @@ -145,7 +145,7 @@ size_t StringRef::find(StringRef Str, size_t From) const { /// rfind - Search for the last string \arg Str in the string. /// -/// \return - The index of the last occurence of \arg Str, or npos if not +/// \return - The index of the last occurrence of \arg Str, or npos if not /// found. size_t StringRef::rfind(StringRef Str) const { size_t N = Str.size(); diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 36edf6e..dbdb303 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -41,7 +41,8 @@ const char *Triple::getArchTypeName(ArchType Kind) { case x86_64: return "x86_64"; case xcore: return "xcore"; case mblaze: return "mblaze"; - case ptx: return "ptx"; + case ptx32: return "ptx32"; + case ptx64: return "ptx64"; } return "<invalid>"; @@ -74,7 +75,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case xcore: return "xcore"; - case ptx: return "ptx"; + case ptx32: return "ptx"; + case ptx64: return "ptx"; } } @@ -84,6 +86,7 @@ const char *Triple::getVendorTypeName(VendorType Kind) { case Apple: return "apple"; case PC: return "pc"; + case SCEI: return "scei"; } return "<invalid>"; @@ -98,8 +101,10 @@ const char *Triple::getOSTypeName(OSType Kind) { case Darwin: return "darwin"; case DragonFly: return "dragonfly"; case FreeBSD: return "freebsd"; + case IOS: return "ios"; case Linux: return "linux"; case Lv2: return "lv2"; + case MacOSX: return "macosx"; case MinGW32: return "mingw32"; case NetBSD: return "netbsd"; case OpenBSD: return "openbsd"; @@ -162,8 +167,10 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { return x86_64; if (Name == "xcore") return xcore; - if (Name == "ptx") - return ptx; + if (Name == "ptx32") + return ptx32; + if (Name == "ptx64") + return ptx64; return UnknownArch; } @@ -202,15 +209,17 @@ Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) { Str == "armv6" || Str == "armv7") return Triple::arm; - if (Str == "ptx") - return Triple::ptx; + if (Str == "ptx32") + return Triple::ptx32; + if (Str == "ptx64") + return Triple::ptx64; return Triple::UnknownArch; } // Returns architecture name that is understood by the target assembler. const char *Triple::getArchNameForAssembler() { - if (getOS() != Triple::Darwin && getVendor() != Triple::Apple) + if (!isOSDarwin() && getVendor() != Triple::Apple) return NULL; StringRef Str = getArchName(); @@ -235,8 +244,10 @@ const char *Triple::getArchNameForAssembler() { return "armv6"; if (Str == "armv7" || Str == "thumbv7") return "armv7"; - if (Str == "ptx") - return "ptx"; + if (Str == "ptx32") + return "ptx32"; + if (Str == "ptx64") + return "ptx64"; return NULL; } @@ -285,8 +296,10 @@ Triple::ArchType Triple::ParseArch(StringRef ArchName) { return tce; else if (ArchName == "xcore") return xcore; - else if (ArchName == "ptx") - return ptx; + else if (ArchName == "ptx32") + return ptx32; + else if (ArchName == "ptx64") + return ptx64; else return UnknownArch; } @@ -296,6 +309,8 @@ Triple::VendorType Triple::ParseVendor(StringRef VendorName) { return Apple; else if (VendorName == "pc") return PC; + else if (VendorName == "scei") + return SCEI; else return UnknownVendor; } @@ -311,10 +326,14 @@ Triple::OSType Triple::ParseOS(StringRef OSName) { return DragonFly; else if (OSName.startswith("freebsd")) return FreeBSD; + else if (OSName.startswith("ios")) + return IOS; else if (OSName.startswith("linux")) return Linux; else if (OSName.startswith("lv2")) return Lv2; + else if (OSName.startswith("macosx")) + return MacOSX; else if (OSName.startswith("mingw32")) return MinGW32; else if (OSName.startswith("netbsd")) @@ -523,67 +542,44 @@ StringRef Triple::getOSAndEnvironmentName() const { static unsigned EatNumber(StringRef &Str) { assert(!Str.empty() && Str[0] >= '0' && Str[0] <= '9' && "Not a number"); - unsigned Result = Str[0]-'0'; + unsigned Result = 0; - // Eat the digit. - Str = Str.substr(1); - - // Handle "darwin11". - if (Result == 1 && !Str.empty() && Str[0] >= '0' && Str[0] <= '9') { + do { + // Consume the leading digit. Result = Result*10 + (Str[0] - '0'); + // Eat the digit. Str = Str.substr(1); - } + } while (!Str.empty() && Str[0] >= '0' && Str[0] <= '9'); return Result; } -/// getDarwinNumber - Parse the 'darwin number' out of the specific target -/// triple. For example, if we have darwin8.5 return 8,5,0. If any entry is -/// not defined, return 0's. This requires that the triple have an OSType of -/// darwin before it is called. -void Triple::getDarwinNumber(unsigned &Maj, unsigned &Min, - unsigned &Revision) const { - assert(getOS() == Darwin && "Not a darwin target triple!"); +void Triple::getOSVersion(unsigned &Major, unsigned &Minor, + unsigned &Micro) const { StringRef OSName = getOSName(); - assert(OSName.startswith("darwin") && "Unknown darwin target triple!"); - - // Strip off "darwin". - OSName = OSName.substr(6); - - Maj = Min = Revision = 0; - - if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9') - return; - // The major version is the first digit. - Maj = EatNumber(OSName); - if (OSName.empty()) return; + // Assume that the OS portion of the triple starts with the canonical name. + StringRef OSTypeName = getOSTypeName(getOS()); + if (OSName.startswith(OSTypeName)) + OSName = OSName.substr(OSTypeName.size()); - // Handle minor version: 10.4.9 -> darwin8.9. - if (OSName[0] != '.') - return; + // Any unset version defaults to 0. + Major = Minor = Micro = 0; - // Eat the '.'. - OSName = OSName.substr(1); - - if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9') - return; - - Min = EatNumber(OSName); - if (OSName.empty()) return; - - // Handle revision darwin8.9.1 - if (OSName[0] != '.') - return; - - // Eat the '.'. - OSName = OSName.substr(1); + // Parse up to three components. + unsigned *Components[3] = { &Major, &Minor, &Micro }; + for (unsigned i = 0; i != 3; ++i) { + if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9') + break; - if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9') - return; + // Consume the leading number. + *Components[i] = EatNumber(OSName); - Revision = EatNumber(OSName); + // Consume the separator, if present. + if (OSName.startswith(".")) + OSName = OSName.substr(1); + } } void Triple::setTriple(const Twine &Str) { diff --git a/lib/Support/Unix/Host.inc b/lib/Support/Unix/Host.inc index ed74b67..8cbec8c 100644 --- a/lib/Support/Unix/Host.inc +++ b/lib/Support/Unix/Host.inc @@ -87,10 +87,7 @@ std::string sys::getHostTriple() { std::string::size_type DarwinDashIdx = Triple.find("-darwin"); if (DarwinDashIdx != std::string::npos) { Triple.resize(DarwinDashIdx + strlen("-darwin")); - - // Only add the major part of the os version. - std::string Version = getOSVersion(); - Triple += Version.substr(0, Version.find('.')); + Triple += getOSVersion(); } return Triple; diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc index 4312d67..5a57a28 100644 --- a/lib/Support/Unix/Memory.inc +++ b/lib/Support/Unix/Memory.inc @@ -124,7 +124,7 @@ bool llvm::sys::Memory::setExecutable (MemoryBlock &M, std::string *ErrMsg) { (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY); return KERN_SUCCESS == kr; #else - return false; + return true; #endif } diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index 0f6e800..430cf2e 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -869,18 +869,18 @@ Path::makeUnique(bool reuse_current, std::string* ErrMsg) { return false; } -const char *Path::MapInFilePages(int FD, uint64_t FileSize) { +const char *Path::MapInFilePages(int FD, size_t FileSize, off_t Offset) { int Flags = MAP_PRIVATE; #ifdef MAP_FILE Flags |= MAP_FILE; #endif - void *BasePtr = ::mmap(0, FileSize, PROT_READ, Flags, FD, 0); + void *BasePtr = ::mmap(0, FileSize, PROT_READ, Flags, FD, Offset); if (BasePtr == MAP_FAILED) return 0; return (const char*)BasePtr; } -void Path::UnMapFilePages(const char *BasePtr, uint64_t FileSize) { +void Path::UnMapFilePages(const char *BasePtr, size_t FileSize) { ::munmap((void*)BasePtr, FileSize); } diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc index 1104bc7..9f0a9ef 100644 --- a/lib/Support/Unix/Program.inc +++ b/lib/Support/Unix/Program.inc @@ -132,7 +132,7 @@ static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) { #ifdef HAVE_POSIX_SPAWN static bool RedirectIO_PS(const Path *Path, int FD, std::string *ErrMsg, - posix_spawn_file_actions_t &FileActions) { + posix_spawn_file_actions_t *FileActions) { if (Path == 0) // Noop return false; const char *File; @@ -142,7 +142,7 @@ static bool RedirectIO_PS(const Path *Path, int FD, std::string *ErrMsg, else File = Path->c_str(); - if (int Err = posix_spawn_file_actions_addopen(&FileActions, FD, + if (int Err = posix_spawn_file_actions_addopen(FileActions, FD, File, FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666)) return MakeErrMsg(ErrMsg, "Cannot dup2", Err); return false; @@ -185,10 +185,13 @@ Program::Execute(const Path &path, const char **args, const char **envp, // posix_spawn. It is more efficient than fork/exec. #ifdef HAVE_POSIX_SPAWN if (memoryLimit == 0) { - posix_spawn_file_actions_t FileActions; - posix_spawn_file_actions_init(&FileActions); + posix_spawn_file_actions_t FileActionsStore; + posix_spawn_file_actions_t *FileActions = 0; if (redirects) { + FileActions = &FileActionsStore; + posix_spawn_file_actions_init(FileActions); + // Redirect stdin/stdout. if (RedirectIO_PS(redirects[0], 0, ErrMsg, FileActions) || RedirectIO_PS(redirects[1], 1, ErrMsg, FileActions)) @@ -200,7 +203,7 @@ Program::Execute(const Path &path, const char **args, const char **envp, } else { // If stdout and stderr should go to the same place, redirect stderr // to the FD already open for stdout. - if (int Err = posix_spawn_file_actions_adddup2(&FileActions, 1, 2)) + if (int Err = posix_spawn_file_actions_adddup2(FileActions, 1, 2)) return !MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout", Err); } } @@ -216,10 +219,11 @@ Program::Execute(const Path &path, const char **args, const char **envp, // Explicitly initialized to prevent what appears to be a valgrind false // positive. pid_t PID = 0; - int Err = posix_spawn(&PID, path.c_str(), &FileActions, /*attrp*/0, + int Err = posix_spawn(&PID, path.c_str(), FileActions, /*attrp*/0, const_cast<char **>(args), const_cast<char **>(envp)); - posix_spawn_file_actions_destroy(&FileActions); + if (FileActions) + posix_spawn_file_actions_destroy(FileActions); if (Err) return !MakeErrMsg(ErrMsg, "posix_spawn failed", Err); @@ -232,7 +236,7 @@ Program::Execute(const Path &path, const char **args, const char **envp, // Create a child process. int child = fork(); switch (child) { - // An error occured: Return to the caller. + // An error occurred: Return to the caller. case -1: MakeErrMsg(ErrMsg, "Couldn't fork"); return false; diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index 0a61759..e286869 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This file defines some helpful functions for dealing with the possibility of -// Unix signals occuring while your program is running. +// Unix signals occurring while your program is running. // //===----------------------------------------------------------------------===// @@ -274,6 +274,9 @@ void llvm::sys::PrintStackTraceOnErrorSignal() { #ifdef __APPLE__ +#include <signal.h> +#include <pthread.h> + int raise(int sig) { return pthread_kill(pthread_self(), sig); } @@ -291,9 +294,6 @@ void __assert_rtn(const char *func, abort(); } -#include <signal.h> -#include <pthread.h> - void abort() { raise(SIGABRT); usleep(1000); diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc index 2c14366..4227844 100644 --- a/lib/Support/Windows/DynamicLibrary.inc +++ b/lib/Support/Windows/DynamicLibrary.inc @@ -41,41 +41,12 @@ using namespace sys; static std::vector<HMODULE> OpenedHandles; -#ifdef _WIN64 - typedef DWORD64 ModuleBaseType; -#else - typedef ULONG ModuleBaseType; -#endif - extern "C" { -// Use old callback if: -// - Not using Visual Studio -// - Visual Studio 2005 or earlier but only if we are not using the Windows SDK -// or Windows SDK version is older than 6.0 -// Use new callback if: -// - Newer Visual Studio (comes with newer SDK). -// - Visual Studio 2005 with Windows SDK 6.0+ -#if defined(_MSC_VER) - #if _MSC_VER < 1500 && (!defined(VER_PRODUCTBUILD) || VER_PRODUCTBUILD < 6000) - #define OLD_ELM_CALLBACK_DECL 1 - #endif -#elif defined(__MINGW64__) - // Use new callback. -#elif defined(__MINGW32__) - #define OLD_ELM_CALLBACK_DECL 1 -#endif -#ifdef OLD_ELM_CALLBACK_DECL - static BOOL CALLBACK ELM_Callback(PSTR ModuleName, - ModuleBaseType ModuleBase, + static BOOL CALLBACK ELM_Callback(WIN32_ELMCB_PCSTR ModuleName, + ULONG_PTR ModuleBase, ULONG ModuleSize, PVOID UserContext) -#else - static BOOL CALLBACK ELM_Callback(PCSTR ModuleName, - ModuleBaseType ModuleBase, - ULONG ModuleSize, - PVOID UserContext) -#endif { // Ignore VC++ runtimes prior to 7.1. Somehow some of them get loaded // into the process. diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc index 625f67a..42a92f9 100644 --- a/lib/Support/Windows/Path.inc +++ b/lib/Support/Windows/Path.inc @@ -882,7 +882,17 @@ Path::makeUnique(bool reuse_current, std::string* ErrMsg) { // Find a numeric suffix that isn't used by an existing file. Assume there // won't be more than 1 million files with the same prefix. Probably a safe // bet. - static unsigned FCounter = 0; + static int FCounter = -1; + if (FCounter < 0) { + // Give arbitrary initial seed. + // FIXME: We should use sys::fs::unique_file() in future. + LARGE_INTEGER cnt64; + DWORD x = GetCurrentProcessId(); + x = (x << 16) | (x >> 16); + if (QueryPerformanceCounter(&cnt64)) // RDTSC + x ^= cnt64.HighPart ^ cnt64.LowPart; + FCounter = x % 1000000; + } do { sprintf(FNBuffer+offset, "-%06u", FCounter); if (++FCounter > 999999) @@ -908,12 +918,12 @@ Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) { } /// MapInFilePages - Not yet implemented on win32. -const char *Path::MapInFilePages(int FD, uint64_t FileSize) { +const char *Path::MapInFilePages(int FD, size_t FileSize, off_t Offset) { return 0; } /// MapInFilePages - Not yet implemented on win32. -void Path::UnMapFilePages(const char *Base, uint64_t FileSize) { +void Path::UnMapFilePages(const char *Base, size_t FileSize) { assert(0 && "NOT IMPLEMENTED"); } diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc index 8effb0c..af71b73 100644 --- a/lib/Support/Windows/PathV2.inc +++ b/lib/Support/Windows/PathV2.inc @@ -449,7 +449,14 @@ error_code status(const Twine &path, file_status &result) { SmallString<128> path_storage; SmallVector<wchar_t, 128> path_utf16; - if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage), + StringRef path8 = path.toStringRef(path_storage); + // FIXME: We should detect as many "special file name" as possible. + if (path8.compare_lower("nul") == 0) { + result = file_status(file_type::character_file); + return success; + } + + if (error_code ec = UTF8ToUTF16(path8, path_utf16)) return ec; @@ -649,7 +656,7 @@ error_code get_magic(const Twine &path, uint32_t len, ::CloseHandle(file); if (!read_success || (bytes_read != len)) { // Set result size to the number of bytes read if it's valid. - if (bytes_read >= 0 && bytes_read <= len) + if (bytes_read <= len) result.set_size(bytes_read); // ERROR_HANDLE_EOF is mapped to errc::value_too_large. return ec; diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 80ea740..5a71fa3 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -220,6 +220,36 @@ raw_ostream &raw_ostream::operator<<(const void *P) { } raw_ostream &raw_ostream::operator<<(double N) { +#ifdef _WIN32 + // On MSVCRT and compatible, output of %e is incompatible to Posix + // by default. Number of exponent digits should be at least 2. "%+03d" + // FIXME: Implement our formatter to here or Support/Format.h! + int fpcl = _fpclass(N); + + // negative zero + if (fpcl == _FPCLASS_NZ) + return *this << "-0.000000e+00"; + + char buf[16]; + unsigned len; + len = snprintf(buf, sizeof(buf), "%e", N); + if (len <= sizeof(buf) - 2) { + if (len >= 5 && buf[len - 5] == 'e' && buf[len - 3] == '0') { + int cs = buf[len - 4]; + if (cs == '+' || cs == '-') { + int c1 = buf[len - 2]; + int c0 = buf[len - 1]; + if (isdigit(c1) && isdigit(c0)) { + // Trim leading '0': "...e+012" -> "...e+12\0" + buf[len - 3] = c1; + buf[len - 2] = c0; + buf[--len] = 0; + } + } + } + return this->operator<<(buf); + } +#endif return this->operator<<(format("%e", N)); } @@ -265,15 +295,23 @@ raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) { return write(Ptr, Size); } - // Write out the data in buffer-sized blocks until the remainder - // fits within the buffer. - do { - size_t NumBytes = OutBufEnd - OutBufCur; - copy_to_buffer(Ptr, NumBytes); - flush_nonempty(); - Ptr += NumBytes; - Size -= NumBytes; - } while (OutBufCur+Size > OutBufEnd); + size_t NumBytes = OutBufEnd - OutBufCur; + + // If the buffer is empty at this point we have a string that is larger + // than the buffer. Directly write the chunk that is a multiple of the + // preferred buffer size and put the remainder in the buffer. + if (BUILTIN_EXPECT(OutBufCur == OutBufStart, false)) { + size_t BytesToWrite = Size - (Size % NumBytes); + write_impl(Ptr, BytesToWrite); + copy_to_buffer(Ptr + BytesToWrite, Size - BytesToWrite); + return *this; + } + + // We don't have enough space in the buffer to fit the string in. Insert as + // much as possible, flush and start over with the remainder. + copy_to_buffer(Ptr, NumBytes); + flush_nonempty(); + return write(Ptr + NumBytes, Size - NumBytes); } copy_to_buffer(Ptr, Size); @@ -458,6 +496,14 @@ raw_fd_ostream::~raw_fd_ostream() { } } +#ifdef __MINGW32__ + // On mingw, global dtors should not call exit(). + // report_fatal_error() invokes exit(). We know report_fatal_error() + // might not write messages to stderr when any errors were detected + // on FD == 2. + if (FD == 2) return; +#endif + // If there are any pending errors, report them now. Clients wishing // to avoid report_fatal_error calls should check for errors with // has_error() and clear the error flag with clear_error() before diff --git a/lib/Support/regcomp.c b/lib/Support/regcomp.c index cd018d5..46c91a9 100644 --- a/lib/Support/regcomp.c +++ b/lib/Support/regcomp.c @@ -780,7 +780,7 @@ p_b_cclass(struct parse *p, cset *cs) const char *u; char c; - while (MORE() && isalpha(PEEK())) + while (MORE() && isalpha((uch)PEEK())) NEXT(); len = p->next - sp; for (cp = cclasses; cp->name != NULL; cp++) diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index bf4315f..6af5f85 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -51,6 +51,12 @@ def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", // to just not use them. def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true", "Disable VFP / NEON MAC instructions">; + +// Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding. +def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", + "HasVMLxForwarding", "true", + "Has multiplier accumulator forwarding">; + // Some processors benefit from using NEON instructions for scalar // single-precision FP operations. def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", @@ -61,6 +67,14 @@ def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", "Prefer 32-bit Thumb instrs">; +/// Some instructions update CPSR partially, which can add false dependency for +/// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is +/// mapped to a separate physical register. Avoid partial CPSR update for these +/// processors. +def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", + "AvoidCPSRPartialUpdate", "true", + "Avoid CPSR partial update for OOO execution">; + // Multiprocessing extension. def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", "Supports Multiprocessing extension">; @@ -100,11 +114,13 @@ def ProcOthers : SubtargetFeature<"others", "ARMProcFamily", "Others", def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", "Cortex-A8 ARM processors", [FeatureSlowFPBrcc, FeatureNEONForFP, - FeatureHasSlowFPVMLx, FeatureT2XtPk]>; + FeatureHasSlowFPVMLx, FeatureVMLxForwarding, + FeatureT2XtPk]>; def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", "Cortex-A9 ARM processors", - [FeatureHasSlowFPVMLx, FeatureT2XtPk, - FeatureFP16]>; + [FeatureVMLxForwarding, + FeatureT2XtPk, FeatureFP16, + FeatureAvoidPartialCPSR]>; class ProcNoItin<string Name, list<SubtargetFeature> Features> : Processor<Name, GenericItineraries, Features>; @@ -171,6 +187,8 @@ def : Processor<"cortex-a8", CortexA8Itineraries, [ArchV7A, ProcA8]>; def : Processor<"cortex-a9", CortexA9Itineraries, [ArchV7A, ProcA9]>; +def : Processor<"cortex-a9-mp", CortexA9Itineraries, + [ArchV7A, ProcA9, FeatureMP]>; // V7M Processors. def : ProcNoItin<"cortex-m3", [ArchV7M]>; diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index 19fbf05..595708f 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -408,16 +408,18 @@ namespace ARM_AM { // // The first operand is always a Reg. The second operand is a reg if in // reg/reg form, otherwise it's reg#0. The third field encodes the operation - // in bit 12, the immediate in bits 0-11, and the shift op in 13-15. + // in bit 12, the immediate in bits 0-11, and the shift op in 13-15. The + // fourth operand 16-17 encodes the index mode. // // If this addressing mode is a frame index (before prolog/epilog insertion // and code rewriting), this operand will have the form: FI#, reg0, <offs> // with no shift amount for the frame offset. // - static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO) { + static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, + unsigned IdxMode = 0) { assert(Imm12 < (1 << 12) && "Imm too large!"); bool isSub = Opc == sub; - return Imm12 | ((int)isSub << 12) | (SO << 13); + return Imm12 | ((int)isSub << 12) | (SO << 13) | (IdxMode << 16) ; } static inline unsigned getAM2Offset(unsigned AM2Opc) { return AM2Opc & ((1 << 12)-1); @@ -426,7 +428,10 @@ namespace ARM_AM { return ((AM2Opc >> 12) & 1) ? sub : add; } static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) { - return (ShiftOpc)(AM2Opc >> 13); + return (ShiftOpc)((AM2Opc >> 13) & 7); + } + static inline unsigned getAM2IdxMode(unsigned AM2Opc) { + return (AM2Opc >> 16); } @@ -441,12 +446,14 @@ namespace ARM_AM { // // The first operand is always a Reg. The second operand is a reg if in // reg/reg form, otherwise it's reg#0. The third field encodes the operation - // in bit 8, the immediate in bits 0-7. + // in bit 8, the immediate in bits 0-7. The fourth operand 9-10 encodes the + // index mode. /// getAM3Opc - This function encodes the addrmode3 opc field. - static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset) { + static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, + unsigned IdxMode = 0) { bool isSub = Opc == sub; - return ((int)isSub << 8) | Offset; + return ((int)isSub << 8) | Offset | (IdxMode << 9); } static inline unsigned char getAM3Offset(unsigned AM3Opc) { return AM3Opc & 0xFF; @@ -454,6 +461,9 @@ namespace ARM_AM { static inline AddrOpc getAM3Op(unsigned AM3Opc) { return ((AM3Opc >> 8) & 1) ? sub : add; } + static inline unsigned getAM3IdxMode(unsigned AM3Opc) { + return (AM3Opc >> 9); + } //===--------------------------------------------------------------------===// // Addressing Mode #4 diff --git a/lib/Target/ARM/ARMAsmBackend.cpp b/lib/Target/ARM/ARMAsmBackend.cpp index ec23449..f062819 100644 --- a/lib/Target/ARM/ARMAsmBackend.cpp +++ b/lib/Target/ARM/ARMAsmBackend.cpp @@ -246,7 +246,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { } uint32_t out = (opc << 21); - out |= (Value & 0x800) << 14; + out |= (Value & 0x800) << 15; out |= (Value & 0x700) << 4; out |= (Value & 0x0FF); @@ -416,21 +416,22 @@ void ELFARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, // FIXME: This should be in a separate file. class DarwinARMAsmBackend : public ARMAsmBackend { public: - DarwinARMAsmBackend(const Target &T) : ARMAsmBackend(T) { } - - void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value) const; + const object::mach::CPUSubtypeARM Subtype; + DarwinARMAsmBackend(const Target &T, object::mach::CPUSubtypeARM st) + : ARMAsmBackend(T), Subtype(st) { } MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - // FIXME: Subtarget info should be derived. Force v7 for now. return createMachObjectWriter(new ARMMachObjectWriter( /*Is64Bit=*/false, object::mach::CTM_ARM, - object::mach::CSARM_V7), + Subtype), OS, /*IsLittleEndian=*/true); } + void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value) const; + virtual bool doesSectionRequireSymbols(const MCSection &Section) const { return false; } @@ -499,14 +500,17 @@ void DarwinARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, TargetAsmBackend *llvm::createARMAsmBackend(const Target &T, const std::string &TT) { - switch (Triple(TT).getOS()) { - case Triple::Darwin: - return new DarwinARMAsmBackend(T); - case Triple::MinGW32: - case Triple::Cygwin: - case Triple::Win32: - assert(0 && "Windows not supported on ARM"); - default: - return new ELFARMAsmBackend(T, Triple(TT).getOS()); + Triple TheTriple(TT); + + if (TheTriple.isOSDarwin()) { + if (TheTriple.getArchName() == "armv6" || + TheTriple.getArchName() == "thumbv6") + return new DarwinARMAsmBackend(T, object::mach::CSARM_V6); + return new DarwinARMAsmBackend(T, object::mach::CSARM_V7); } + + if (TheTriple.isOSWindows()) + assert(0 && "Windows not supported on ARM"); + + return new ELFARMAsmBackend(T, Triple(TT).getOS()); } diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index db12b8e..c428e18 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -88,6 +88,11 @@ namespace { case ARMBuildAttrs::CPU_name: Streamer.EmitRawText(StringRef("\t.cpu ") + LowercaseString(String)); break; + /* GAS requires .fpu to be emitted regardless of EABI attribute */ + case ARMBuildAttrs::Advanced_SIMD_arch: + case ARMBuildAttrs::VFP_arch: + Streamer.EmitRawText(StringRef("\t.fpu ") + LowercaseString(String)); + break; default: assert(0 && "Unsupported Text attribute in ASM Mode"); break; } } @@ -167,6 +172,117 @@ getDebugValueLocation(const MachineInstr *MI) const { return Location; } +/// getDwarfRegOpSize - get size required to emit given machine location using +/// dwarf encoding. +unsigned ARMAsmPrinter::getDwarfRegOpSize(const MachineLocation &MLoc) const { + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1) + return AsmPrinter::getDwarfRegOpSize(MLoc); + else { + unsigned Reg = MLoc.getReg(); + if (Reg >= ARM::S0 && Reg <= ARM::S31) { + assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering"); + // S registers are described as bit-pieces of a register + // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0) + // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32) + + unsigned SReg = Reg - ARM::S0; + unsigned Rx = 256 + (SReg >> 1); + OutStreamer.AddComment("Loc expr size"); + // DW_OP_regx + ULEB + DW_OP_bit_piece + ULEB + ULEB + // 1 + ULEB(Rx) + 1 + 1 + 1 + return 4 + MCAsmInfo::getULEB128Size(Rx); + } + + if (Reg >= ARM::Q0 && Reg <= ARM::Q15) { + assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering"); + // Q registers Q0-Q15 are described by composing two D registers together. + // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) DW_OP_piece(8) + + unsigned QReg = Reg - ARM::Q0; + unsigned D1 = 256 + 2 * QReg; + unsigned D2 = D1 + 1; + + OutStreamer.AddComment("Loc expr size"); + // DW_OP_regx + ULEB + DW_OP_piece + ULEB(8) + + // DW_OP_regx + ULEB + DW_OP_piece + ULEB(8); + // 6 + ULEB(D1) + ULEB(D2) + return 6 + MCAsmInfo::getULEB128Size(D1) + MCAsmInfo::getULEB128Size(D2); + } + } + return 0; +} + +/// EmitDwarfRegOp - Emit dwarf register operation. +void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1) + AsmPrinter::EmitDwarfRegOp(MLoc); + else { + unsigned Reg = MLoc.getReg(); + if (Reg >= ARM::S0 && Reg <= ARM::S31) { + assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering"); + // S registers are described as bit-pieces of a register + // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0) + // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32) + + unsigned SReg = Reg - ARM::S0; + bool odd = SReg & 0x1; + unsigned Rx = 256 + (SReg >> 1); + OutStreamer.AddComment("Loc expr size"); + // DW_OP_regx + ULEB + DW_OP_bit_piece + ULEB + ULEB + // 1 + ULEB(Rx) + 1 + 1 + 1 + EmitInt16(4 + MCAsmInfo::getULEB128Size(Rx)); + + OutStreamer.AddComment("DW_OP_regx for S register"); + EmitInt8(dwarf::DW_OP_regx); + + OutStreamer.AddComment(Twine(SReg)); + EmitULEB128(Rx); + + if (odd) { + OutStreamer.AddComment("DW_OP_bit_piece 32 32"); + EmitInt8(dwarf::DW_OP_bit_piece); + EmitULEB128(32); + EmitULEB128(32); + } else { + OutStreamer.AddComment("DW_OP_bit_piece 32 0"); + EmitInt8(dwarf::DW_OP_bit_piece); + EmitULEB128(32); + EmitULEB128(0); + } + } else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) { + assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering"); + // Q registers Q0-Q15 are described by composing two D registers together. + // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) DW_OP_piece(8) + + unsigned QReg = Reg - ARM::Q0; + unsigned D1 = 256 + 2 * QReg; + unsigned D2 = D1 + 1; + + OutStreamer.AddComment("Loc expr size"); + // DW_OP_regx + ULEB + DW_OP_piece + ULEB(8) + + // DW_OP_regx + ULEB + DW_OP_piece + ULEB(8); + // 6 + ULEB(D1) + ULEB(D2) + EmitInt16(6 + MCAsmInfo::getULEB128Size(D1) + MCAsmInfo::getULEB128Size(D2)); + + OutStreamer.AddComment("DW_OP_regx for Q register: D1"); + EmitInt8(dwarf::DW_OP_regx); + EmitULEB128(D1); + OutStreamer.AddComment("DW_OP_piece 8"); + EmitInt8(dwarf::DW_OP_piece); + EmitULEB128(8); + + OutStreamer.AddComment("DW_OP_regx for Q register: D2"); + EmitInt8(dwarf::DW_OP_regx); + EmitULEB128(D2); + OutStreamer.AddComment("DW_OP_piece 8"); + EmitInt8(dwarf::DW_OP_piece); + EmitULEB128(8); + } + } +} + void ARMAsmPrinter::EmitFunctionEntryLabel() { if (AFI->isThumbFunction()) { OutStreamer.EmitAssemblerFlag(MCAF_Code16); @@ -453,10 +569,13 @@ void ARMAsmPrinter::emitAttributes() { emitARMAttributeSection(); + /* GAS expect .fpu to be emitted, regardless of VFP build attribute */ + bool emitFPU = false; AttributeEmitter *AttrEmitter; - if (OutStreamer.hasRawTextSupport()) + if (OutStreamer.hasRawTextSupport()) { AttrEmitter = new AsmAttributeEmitter(OutStreamer); - else { + emitFPU = true; + } else { MCObjectStreamer &O = static_cast<MCObjectStreamer&>(OutStreamer); AttrEmitter = new ObjectAttributeEmitter(O); } @@ -490,10 +609,36 @@ void ARMAsmPrinter::emitAttributes() { ARMBuildAttrs::Allowed); } - // FIXME: Emit FPU type - if (Subtarget->hasVFP2()) + if (Subtarget->hasNEON() && emitFPU) { + /* NEON is not exactly a VFP architecture, but GAS emit one of + * neon/vfpv3/vfpv2 for .fpu parameters */ + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon"); + /* If emitted for NEON, omit from VFP below, since you can have both + * NEON and VFP in build attributes but only one .fpu */ + emitFPU = false; + } + + /* VFPv3 + .fpu */ + if (Subtarget->hasVFP3()) { + AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, + ARMBuildAttrs::AllowFPv3A); + if (emitFPU) + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv3"); + + /* VFPv2 + .fpu */ + } else if (Subtarget->hasVFP2()) { AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, ARMBuildAttrs::AllowFPv2); + if (emitFPU) + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv2"); + } + + /* TODO: ARMBuildAttrs::Allowed is not completely accurate, + * since NEON can have 1 (allowed) or 2 (fused MAC operations) */ + if (Subtarget->hasNEON()) { + AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, + ARMBuildAttrs::Allowed); + } // Signal various FP modes. if (!UnsafeFPMath) { @@ -777,10 +922,161 @@ void ARMAsmPrinter::EmitPatchedInstruction(const MachineInstr *MI, OutStreamer.EmitInstruction(TmpInst); } +void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { + assert(MI->getFlag(MachineInstr::FrameSetup) && + "Only instruction which are involved into frame setup code are allowed"); + + const MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>(); + + unsigned FramePtr = RegInfo->getFrameRegister(MF); + unsigned Opc = MI->getOpcode(); + unsigned SrcReg, DstReg; + + if (Opc == ARM::tPUSH || Opc == ARM::tLDRpci) { + // Two special cases: + // 1) tPUSH does not have src/dst regs. + // 2) for Thumb1 code we sometimes materialize the constant via constpool + // load. Yes, this is pretty fragile, but for now I don't see better + // way... :( + SrcReg = DstReg = ARM::SP; + } else { + SrcReg = MI->getOperand(1).getReg(); + DstReg = MI->getOperand(0).getReg(); + } + + // Try to figure out the unwinding opcode out of src / dst regs. + if (MI->getDesc().mayStore()) { + // Register saves. + assert(DstReg == ARM::SP && + "Only stack pointer as a destination reg is supported"); + + SmallVector<unsigned, 4> RegList; + // Skip src & dst reg, and pred ops. + unsigned StartOp = 2 + 2; + // Use all the operands. + unsigned NumOffset = 0; + + switch (Opc) { + default: + MI->dump(); + assert(0 && "Unsupported opcode for unwinding information"); + case ARM::tPUSH: + // Special case here: no src & dst reg, but two extra imp ops. + StartOp = 2; NumOffset = 2; + case ARM::STMDB_UPD: + case ARM::t2STMDB_UPD: + case ARM::VSTMDDB_UPD: + assert(SrcReg == ARM::SP && + "Only stack pointer as a source reg is supported"); + for (unsigned i = StartOp, NumOps = MI->getNumOperands() - NumOffset; + i != NumOps; ++i) + RegList.push_back(MI->getOperand(i).getReg()); + break; + case ARM::STR_PRE: + assert(MI->getOperand(2).getReg() == ARM::SP && + "Only stack pointer as a source reg is supported"); + RegList.push_back(SrcReg); + break; + } + OutStreamer.EmitRegSave(RegList, Opc == ARM::VSTMDDB_UPD); + } else { + // Changes of stack / frame pointer. + if (SrcReg == ARM::SP) { + int64_t Offset = 0; + switch (Opc) { + default: + MI->dump(); + assert(0 && "Unsupported opcode for unwinding information"); + case ARM::MOVr: + case ARM::tMOVgpr2gpr: + case ARM::tMOVgpr2tgpr: + Offset = 0; + break; + case ARM::ADDri: + Offset = -MI->getOperand(2).getImm(); + break; + case ARM::SUBri: + case ARM::t2SUBrSPi: + Offset = MI->getOperand(2).getImm(); + break; + case ARM::tSUBspi: + Offset = MI->getOperand(2).getImm()*4; + break; + case ARM::tADDspi: + case ARM::tADDrSPi: + Offset = -MI->getOperand(2).getImm()*4; + break; + case ARM::tLDRpci: { + // Grab the constpool index and check, whether it corresponds to + // original or cloned constpool entry. + unsigned CPI = MI->getOperand(1).getIndex(); + const MachineConstantPool *MCP = MF.getConstantPool(); + if (CPI >= MCP->getConstants().size()) + CPI = AFI.getOriginalCPIdx(CPI); + assert(CPI != -1U && "Invalid constpool index"); + + // Derive the actual offset. + const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI]; + assert(!CPE.isMachineConstantPoolEntry() && "Invalid constpool entry"); + // FIXME: Check for user, it should be "add" instruction! + Offset = -cast<ConstantInt>(CPE.Val.ConstVal)->getSExtValue(); + break; + } + } + + if (DstReg == FramePtr && FramePtr != ARM::SP) + // Set-up of the frame pointer. Positive values correspond to "add" + // instruction. + OutStreamer.EmitSetFP(FramePtr, ARM::SP, -Offset); + else if (DstReg == ARM::SP) { + // Change of SP by an offset. Positive values correspond to "sub" + // instruction. + OutStreamer.EmitPad(Offset); + } else { + MI->dump(); + assert(0 && "Unsupported opcode for unwinding information"); + } + } else if (DstReg == ARM::SP) { + // FIXME: .movsp goes here + MI->dump(); + assert(0 && "Unsupported opcode for unwinding information"); + } + else { + MI->dump(); + assert(0 && "Unsupported opcode for unwinding information"); + } + } +} + +extern cl::opt<bool> EnableARMEHABI; + void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { unsigned Opc = MI->getOpcode(); switch (Opc) { default: break; + case ARM::B: { + // B is just a Bcc with an 'always' predicate. + MCInst TmpInst; + LowerARMMachineInstrToMCInst(MI, TmpInst, *this); + TmpInst.setOpcode(ARM::Bcc); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case ARM::LDMIA_RET: { + // LDMIA_RET is just a normal LDMIA_UPD instruction that targets PC and as + // such has additional code-gen properties and scheduling information. + // To emit it, we just construct as normal and set the opcode to LDMIA_UPD. + MCInst TmpInst; + LowerARMMachineInstrToMCInst(MI, TmpInst, *this); + TmpInst.setOpcode(ARM::LDMIA_UPD); + OutStreamer.EmitInstruction(TmpInst); + return; + } case ARM::t2ADDrSPi: case ARM::t2ADDrSPi12: case ARM::t2SUBrSPi: @@ -850,6 +1146,26 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitInstruction(TmpInst); return; } + // Darwin call instructions are just normal call instructions with different + // clobber semantics (they clobber R9). + case ARM::BLr9: + case ARM::BLr9_pred: + case ARM::BLXr9: + case ARM::BLXr9_pred: { + unsigned newOpc; + switch (Opc) { + default: assert(0); + case ARM::BLr9: newOpc = ARM::BL; break; + case ARM::BLr9_pred: newOpc = ARM::BL_pred; break; + case ARM::BLXr9: newOpc = ARM::BLX; break; + case ARM::BLXr9_pred: newOpc = ARM::BLX_pred; break; + } + MCInst TmpInst; + LowerARMMachineInstrToMCInst(MI, TmpInst, *this); + TmpInst.setOpcode(newOpc); + OutStreamer.EmitInstruction(TmpInst); + return; + } case ARM::BXr9_CALL: case ARM::BX_CALL: { { @@ -1502,6 +1818,49 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } + // Tail jump branches are really just branch instructions with additional + // code-gen attributes. Convert them to the canonical form here. + case ARM::TAILJMPd: + case ARM::TAILJMPdND: { + MCInst TmpInst, TmpInst2; + // Lower the instruction as-is to get the operands properly converted. + LowerARMMachineInstrToMCInst(MI, TmpInst2, *this); + TmpInst.setOpcode(ARM::Bcc); + TmpInst.addOperand(TmpInst2.getOperand(0)); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.AddComment("TAILCALL"); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case ARM::tTAILJMPd: + case ARM::tTAILJMPdND: { + MCInst TmpInst, TmpInst2; + LowerARMMachineInstrToMCInst(MI, TmpInst2, *this); + TmpInst.setOpcode(ARM::tB); + TmpInst.addOperand(TmpInst2.getOperand(0)); + OutStreamer.AddComment("TAILCALL"); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case ARM::TAILJMPrND: + case ARM::tTAILJMPrND: + case ARM::TAILJMPr: + case ARM::tTAILJMPr: { + unsigned newOpc = (Opc == ARM::TAILJMPr || Opc == ARM::TAILJMPrND) + ? ARM::BX : ARM::tBX; + MCInst TmpInst; + TmpInst.setOpcode(newOpc); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.AddComment("TAILCALL"); + OutStreamer.EmitInstruction(TmpInst); + return; + } + // These are the pseudos created to comply with stricter operand restrictions // on ARMv5. Lower them now to "normal" instructions, since all the // restrictions are already satisfied. @@ -1530,6 +1889,11 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; LowerARMMachineInstrToMCInst(MI, TmpInst, *this); + + // Emit unwinding stuff for frame-related instructions + if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup)) + EmitUnwindingInstruction(MI); + OutStreamer.EmitInstruction(TmpInst); } @@ -1538,10 +1902,11 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { //===----------------------------------------------------------------------===// static MCInstPrinter *createARMMCInstPrinter(const Target &T, + TargetMachine &TM, unsigned SyntaxVariant, const MCAsmInfo &MAI) { if (SyntaxVariant == 0) - return new ARMInstPrinter(MAI); + return new ARMInstPrinter(TM, MAI); return 0; } diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index 5852684..1ee1b70 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -82,11 +82,20 @@ private: // Generic helper used to emit e.g. ARMv5 mul pseudos void EmitPatchedInstruction(const MachineInstr *MI, unsigned TargetOpc); + void EmitUnwindingInstruction(const MachineInstr *MI); + public: void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); MachineLocation getDebugValueLocation(const MachineInstr *MI) const; + /// getDwarfRegOpSize - get size required to emit given machine location + /// using dwarf encoding. + virtual unsigned getDwarfRegOpSize(const MachineLocation &MLoc) const; + + /// EmitDwarfRegOp - Emit dwarf register operation. + virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const; + virtual unsigned getISAEncoding() { // ARM/Darwin adds ISA to the DWARF info for each function. if (!Subtarget->isTargetDarwin()) diff --git a/lib/Target/ARM/ARMBaseInfo.h b/lib/Target/ARM/ARMBaseInfo.h index a56cc1a..36edbad 100644 --- a/lib/Target/ARM/ARMBaseInfo.h +++ b/lib/Target/ARM/ARMBaseInfo.h @@ -200,6 +200,59 @@ inline static unsigned getARMRegisterNumbering(unsigned Reg) { } namespace ARMII { + + /// ARM Index Modes + enum IndexMode { + IndexModeNone = 0, + IndexModePre = 1, + IndexModePost = 2, + IndexModeUpd = 3 + }; + + /// ARM Addressing Modes + enum AddrMode { + AddrModeNone = 0, + AddrMode1 = 1, + AddrMode2 = 2, + AddrMode3 = 3, + AddrMode4 = 4, + AddrMode5 = 5, + AddrMode6 = 6, + AddrModeT1_1 = 7, + AddrModeT1_2 = 8, + AddrModeT1_4 = 9, + AddrModeT1_s = 10, // i8 * 4 for pc and sp relative data + AddrModeT2_i12 = 11, + AddrModeT2_i8 = 12, + AddrModeT2_so = 13, + AddrModeT2_pc = 14, // +/- i12 for pc relative data + AddrModeT2_i8s4 = 15, // i8 * 4 + AddrMode_i12 = 16 + }; + + inline static const char *AddrModeToString(AddrMode addrmode) { + switch (addrmode) { + default: llvm_unreachable("Unknown memory operation"); + case AddrModeNone: return "AddrModeNone"; + case AddrMode1: return "AddrMode1"; + case AddrMode2: return "AddrMode2"; + case AddrMode3: return "AddrMode3"; + case AddrMode4: return "AddrMode4"; + case AddrMode5: return "AddrMode5"; + case AddrMode6: return "AddrMode6"; + case AddrModeT1_1: return "AddrModeT1_1"; + case AddrModeT1_2: return "AddrModeT1_2"; + case AddrModeT1_4: return "AddrModeT1_4"; + case AddrModeT1_s: return "AddrModeT1_s"; + case AddrModeT2_i12: return "AddrModeT2_i12"; + case AddrModeT2_i8: return "AddrModeT2_i8"; + case AddrModeT2_so: return "AddrModeT2_so"; + case AddrModeT2_pc: return "AddrModeT2_pc"; + case AddrModeT2_i8s4: return "AddrModeT2_i8s4"; + case AddrMode_i12: return "AddrMode_i12"; + } + } + /// Target Operand Flag enum. enum TOF { //===------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 2268e59..44a3976 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1021,7 +1021,7 @@ reMaterialize(MachineBasicBlock &MBB, MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode), DestReg) .addConstantPoolIndex(CPI).addImm(PCLabelId); - (*MIB).setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end()); + MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end()); break; } } @@ -1080,11 +1080,18 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, int CPI1 = MO1.getIndex(); const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0]; const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1]; - ARMConstantPoolValue *ACPV0 = - static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); - ARMConstantPoolValue *ACPV1 = - static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); - return ACPV0->hasSameValue(ACPV1); + bool isARMCP0 = MCPE0.isMachineConstantPoolEntry(); + bool isARMCP1 = MCPE1.isMachineConstantPoolEntry(); + if (isARMCP0 && isARMCP1) { + ARMConstantPoolValue *ACPV0 = + static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); + ARMConstantPoolValue *ACPV1 = + static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); + return ACPV0->hasSameValue(ACPV1); + } else if (!isARMCP0 && !isARMCP1) { + return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal; + } + return false; } else if (Opcode == ARM::PICLDR) { if (MI1->getOpcode() != Opcode) return false; @@ -1194,7 +1201,7 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, } /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to -/// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should +/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should /// be scheduled togther. On some targets if two loads are loading from /// addresses in the same cache line, it's better if they are scheduled /// together. This function takes two integers that represent the load offsets @@ -1263,19 +1270,19 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, } bool ARMBaseInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, - unsigned NumCyles, + unsigned NumCycles, unsigned ExtraPredCycles, float Probability, float Confidence) const { - if (!NumCyles) + if (!NumCycles) return false; // Attempt to estimate the relative costs of predication versus branching. - float UnpredCost = Probability * NumCyles; + float UnpredCost = Probability * NumCycles; UnpredCost += 1.0; // The branch itself UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty(); - return (float)(NumCyles + ExtraPredCycles) < UnpredCost; + return (float)(NumCycles + ExtraPredCycles) < UnpredCost; } bool ARMBaseInstrInfo:: @@ -1328,7 +1335,7 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, - const ARMBaseInstrInfo &TII) { + const ARMBaseInstrInfo &TII, unsigned MIFlags) { bool isSub = NumBytes < 0; if (isSub) NumBytes = -NumBytes; @@ -1346,7 +1353,8 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) .addReg(BaseReg, RegState::Kill).addImm(ThisVal) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + .addImm((unsigned)Pred).addReg(PredReg).addReg(0) + .setMIFlags(MIFlags); BaseReg = DestReg; } } @@ -1610,18 +1618,84 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, // Set the "zero" bit in CPSR. switch (MI->getOpcode()) { default: break; + case ARM::RSBrr: + case ARM::RSBri: + case ARM::RSCrr: + case ARM::RSCri: + case ARM::ADDrr: case ARM::ADDri: - case ARM::ANDri: - case ARM::t2ANDri: + case ARM::ADCrr: + case ARM::ADCri: + case ARM::SUBrr: case ARM::SUBri: + case ARM::SBCrr: + case ARM::SBCri: + case ARM::t2RSBri: + case ARM::t2ADDrr: case ARM::t2ADDri: + case ARM::t2ADCrr: + case ARM::t2ADCri: + case ARM::t2SUBrr: case ARM::t2SUBri: + case ARM::t2SBCrr: + case ARM::t2SBCri: + case ARM::ANDrr: + case ARM::ANDri: + case ARM::t2ANDrr: + case ARM::t2ANDri: + case ARM::ORRrr: + case ARM::ORRri: + case ARM::t2ORRrr: + case ARM::t2ORRri: + case ARM::EORrr: + case ARM::EORri: + case ARM::t2EORrr: + case ARM::t2EORri: { + // Scan forward for the use of CPSR, if it's a conditional code requires + // checking of V bit, then this is not safe to do. If we can't find the + // CPSR use (i.e. used in another block), then it's not safe to perform + // the optimization. + bool isSafe = false; + I = CmpInstr; + E = MI->getParent()->end(); + while (!isSafe && ++I != E) { + const MachineInstr &Instr = *I; + for (unsigned IO = 0, EO = Instr.getNumOperands(); + !isSafe && IO != EO; ++IO) { + const MachineOperand &MO = Instr.getOperand(IO); + if (!MO.isReg() || MO.getReg() != ARM::CPSR) + continue; + if (MO.isDef()) { + isSafe = true; + break; + } + // Condition code is after the operand before CPSR. + ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm(); + switch (CC) { + default: + isSafe = true; + break; + case ARMCC::VS: + case ARMCC::VC: + case ARMCC::GE: + case ARMCC::LT: + case ARMCC::GT: + case ARMCC::LE: + return false; + } + } + } + + if (!isSafe) + return false; + // Toggle the optional operand to CPSR. MI->getOperand(5).setReg(ARM::CPSR); MI->getOperand(5).setIsDef(true); CmpInstr->eraseFromParent(); return true; } + } return false; } @@ -1741,9 +1815,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, llvm_unreachable("Unexpected multi-uops instruction!"); break; case ARM::VLDMQIA: - case ARM::VLDMQDB: case ARM::VSTMQIA: - case ARM::VSTMQDB: return 2; // The number of uOps for load / store multiple are determined by the number @@ -1757,19 +1829,15 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1. case ARM::VLDMDIA: - case ARM::VLDMDDB: case ARM::VLDMDIA_UPD: case ARM::VLDMDDB_UPD: case ARM::VLDMSIA: - case ARM::VLDMSDB: case ARM::VLDMSIA_UPD: case ARM::VLDMSDB_UPD: case ARM::VSTMDIA: - case ARM::VSTMDDB: case ARM::VSTMDIA_UPD: case ARM::VSTMDDB_UPD: case ARM::VSTMSIA: - case ARM::VSTMSDB: case ARM::VSTMSIA_UPD: case ARM::VSTMSDB_UPD: { unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands(); @@ -1859,7 +1927,6 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, switch (DefTID.getOpcode()) { default: break; case ARM::VLDMSIA: - case ARM::VLDMSDB: case ARM::VLDMSIA_UPD: case ARM::VLDMSDB_UPD: isSLoad = true; @@ -1935,7 +2002,6 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, switch (UseTID.getOpcode()) { default: break; case ARM::VSTMSIA: - case ARM::VSTMSDB: case ARM::VSTMSIA_UPD: case ARM::VSTMSDB_UPD: isSStore = true; @@ -2006,11 +2072,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, break; case ARM::VLDMDIA: - case ARM::VLDMDDB: case ARM::VLDMDIA_UPD: case ARM::VLDMDDB_UPD: case ARM::VLDMSIA: - case ARM::VLDMSDB: case ARM::VLDMSIA_UPD: case ARM::VLDMSDB_UPD: DefCycle = getVLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign); @@ -2049,11 +2113,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, break; case ARM::VSTMDIA: - case ARM::VSTMDDB: case ARM::VSTMDIA_UPD: case ARM::VSTMDDB_UPD: case ARM::VSTMSIA: - case ARM::VSTMSDB: case ARM::VSTMSIA_UPD: case ARM::VSTMSDB_UPD: UseCycle = getVSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign); @@ -2160,6 +2222,101 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } } + if (DefAlign < 8 && Subtarget.isCortexA9()) + switch (DefTID.getOpcode()) { + default: break; + case ARM::VLD1q8: + case ARM::VLD1q16: + case ARM::VLD1q32: + case ARM::VLD1q64: + case ARM::VLD1q8_UPD: + case ARM::VLD1q16_UPD: + case ARM::VLD1q32_UPD: + case ARM::VLD1q64_UPD: + case ARM::VLD2d8: + case ARM::VLD2d16: + case ARM::VLD2d32: + case ARM::VLD2q8: + case ARM::VLD2q16: + case ARM::VLD2q32: + case ARM::VLD2d8_UPD: + case ARM::VLD2d16_UPD: + case ARM::VLD2d32_UPD: + case ARM::VLD2q8_UPD: + case ARM::VLD2q16_UPD: + case ARM::VLD2q32_UPD: + case ARM::VLD3d8: + case ARM::VLD3d16: + case ARM::VLD3d32: + case ARM::VLD1d64T: + case ARM::VLD3d8_UPD: + case ARM::VLD3d16_UPD: + case ARM::VLD3d32_UPD: + case ARM::VLD1d64T_UPD: + case ARM::VLD3q8_UPD: + case ARM::VLD3q16_UPD: + case ARM::VLD3q32_UPD: + case ARM::VLD4d8: + case ARM::VLD4d16: + case ARM::VLD4d32: + case ARM::VLD1d64Q: + case ARM::VLD4d8_UPD: + case ARM::VLD4d16_UPD: + case ARM::VLD4d32_UPD: + case ARM::VLD1d64Q_UPD: + case ARM::VLD4q8_UPD: + case ARM::VLD4q16_UPD: + case ARM::VLD4q32_UPD: + case ARM::VLD1DUPq8: + case ARM::VLD1DUPq16: + case ARM::VLD1DUPq32: + case ARM::VLD1DUPq8_UPD: + case ARM::VLD1DUPq16_UPD: + case ARM::VLD1DUPq32_UPD: + case ARM::VLD2DUPd8: + case ARM::VLD2DUPd16: + case ARM::VLD2DUPd32: + case ARM::VLD2DUPd8_UPD: + case ARM::VLD2DUPd16_UPD: + case ARM::VLD2DUPd32_UPD: + case ARM::VLD4DUPd8: + case ARM::VLD4DUPd16: + case ARM::VLD4DUPd32: + case ARM::VLD4DUPd8_UPD: + case ARM::VLD4DUPd16_UPD: + case ARM::VLD4DUPd32_UPD: + case ARM::VLD1LNd8: + case ARM::VLD1LNd16: + case ARM::VLD1LNd32: + case ARM::VLD1LNd8_UPD: + case ARM::VLD1LNd16_UPD: + case ARM::VLD1LNd32_UPD: + case ARM::VLD2LNd8: + case ARM::VLD2LNd16: + case ARM::VLD2LNd32: + case ARM::VLD2LNq16: + case ARM::VLD2LNq32: + case ARM::VLD2LNd8_UPD: + case ARM::VLD2LNd16_UPD: + case ARM::VLD2LNd32_UPD: + case ARM::VLD2LNq16_UPD: + case ARM::VLD2LNq32_UPD: + case ARM::VLD4LNd8: + case ARM::VLD4LNd16: + case ARM::VLD4LNd32: + case ARM::VLD4LNq16: + case ARM::VLD4LNq32: + case ARM::VLD4LNd8_UPD: + case ARM::VLD4LNd16_UPD: + case ARM::VLD4LNd32_UPD: + case ARM::VLD4LNq16_UPD: + case ARM::VLD4LNq32_UPD: + // If the address is not 64-bit aligned, the latencies of these + // instructions increases by one. + ++Latency; + break; + } + return Latency; } @@ -2226,6 +2383,113 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } } + if (DefAlign < 8 && Subtarget.isCortexA9()) + switch (DefTID.getOpcode()) { + default: break; + case ARM::VLD1q8Pseudo: + case ARM::VLD1q16Pseudo: + case ARM::VLD1q32Pseudo: + case ARM::VLD1q64Pseudo: + case ARM::VLD1q8Pseudo_UPD: + case ARM::VLD1q16Pseudo_UPD: + case ARM::VLD1q32Pseudo_UPD: + case ARM::VLD1q64Pseudo_UPD: + case ARM::VLD2d8Pseudo: + case ARM::VLD2d16Pseudo: + case ARM::VLD2d32Pseudo: + case ARM::VLD2q8Pseudo: + case ARM::VLD2q16Pseudo: + case ARM::VLD2q32Pseudo: + case ARM::VLD2d8Pseudo_UPD: + case ARM::VLD2d16Pseudo_UPD: + case ARM::VLD2d32Pseudo_UPD: + case ARM::VLD2q8Pseudo_UPD: + case ARM::VLD2q16Pseudo_UPD: + case ARM::VLD2q32Pseudo_UPD: + case ARM::VLD3d8Pseudo: + case ARM::VLD3d16Pseudo: + case ARM::VLD3d32Pseudo: + case ARM::VLD1d64TPseudo: + case ARM::VLD3d8Pseudo_UPD: + case ARM::VLD3d16Pseudo_UPD: + case ARM::VLD3d32Pseudo_UPD: + case ARM::VLD1d64TPseudo_UPD: + case ARM::VLD3q8Pseudo_UPD: + case ARM::VLD3q16Pseudo_UPD: + case ARM::VLD3q32Pseudo_UPD: + case ARM::VLD3q8oddPseudo: + case ARM::VLD3q16oddPseudo: + case ARM::VLD3q32oddPseudo: + case ARM::VLD3q8oddPseudo_UPD: + case ARM::VLD3q16oddPseudo_UPD: + case ARM::VLD3q32oddPseudo_UPD: + case ARM::VLD4d8Pseudo: + case ARM::VLD4d16Pseudo: + case ARM::VLD4d32Pseudo: + case ARM::VLD1d64QPseudo: + case ARM::VLD4d8Pseudo_UPD: + case ARM::VLD4d16Pseudo_UPD: + case ARM::VLD4d32Pseudo_UPD: + case ARM::VLD1d64QPseudo_UPD: + case ARM::VLD4q8Pseudo_UPD: + case ARM::VLD4q16Pseudo_UPD: + case ARM::VLD4q32Pseudo_UPD: + case ARM::VLD4q8oddPseudo: + case ARM::VLD4q16oddPseudo: + case ARM::VLD4q32oddPseudo: + case ARM::VLD4q8oddPseudo_UPD: + case ARM::VLD4q16oddPseudo_UPD: + case ARM::VLD4q32oddPseudo_UPD: + case ARM::VLD1DUPq8Pseudo: + case ARM::VLD1DUPq16Pseudo: + case ARM::VLD1DUPq32Pseudo: + case ARM::VLD1DUPq8Pseudo_UPD: + case ARM::VLD1DUPq16Pseudo_UPD: + case ARM::VLD1DUPq32Pseudo_UPD: + case ARM::VLD2DUPd8Pseudo: + case ARM::VLD2DUPd16Pseudo: + case ARM::VLD2DUPd32Pseudo: + case ARM::VLD2DUPd8Pseudo_UPD: + case ARM::VLD2DUPd16Pseudo_UPD: + case ARM::VLD2DUPd32Pseudo_UPD: + case ARM::VLD4DUPd8Pseudo: + case ARM::VLD4DUPd16Pseudo: + case ARM::VLD4DUPd32Pseudo: + case ARM::VLD4DUPd8Pseudo_UPD: + case ARM::VLD4DUPd16Pseudo_UPD: + case ARM::VLD4DUPd32Pseudo_UPD: + case ARM::VLD1LNq8Pseudo: + case ARM::VLD1LNq16Pseudo: + case ARM::VLD1LNq32Pseudo: + case ARM::VLD1LNq8Pseudo_UPD: + case ARM::VLD1LNq16Pseudo_UPD: + case ARM::VLD1LNq32Pseudo_UPD: + case ARM::VLD2LNd8Pseudo: + case ARM::VLD2LNd16Pseudo: + case ARM::VLD2LNd32Pseudo: + case ARM::VLD2LNq16Pseudo: + case ARM::VLD2LNq32Pseudo: + case ARM::VLD2LNd8Pseudo_UPD: + case ARM::VLD2LNd16Pseudo_UPD: + case ARM::VLD2LNd32Pseudo_UPD: + case ARM::VLD2LNq16Pseudo_UPD: + case ARM::VLD2LNq32Pseudo_UPD: + case ARM::VLD4LNd8Pseudo: + case ARM::VLD4LNd16Pseudo: + case ARM::VLD4LNd32Pseudo: + case ARM::VLD4LNq16Pseudo: + case ARM::VLD4LNq32Pseudo: + case ARM::VLD4LNd8Pseudo_UPD: + case ARM::VLD4LNd16Pseudo_UPD: + case ARM::VLD4LNd32Pseudo_UPD: + case ARM::VLD4LNq16Pseudo_UPD: + case ARM::VLD4LNq32Pseudo_UPD: + // If the address is not 64-bit aligned, the latencies of these + // instructions increases by one. + ++Latency; + break; + } + return Latency; } @@ -2264,9 +2528,7 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, default: return ItinData->getStageLatency(get(Opcode).getSchedClass()); case ARM::VLDMQIA: - case ARM::VLDMQDB: case ARM::VSTMQIA: - case ARM::VSTMQDB: return 2; } } diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 7e2183d..9a2faf8 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -34,25 +34,7 @@ namespace ARMII { //===------------------------------------------------------------------===// // This four-bit field describes the addressing mode used. - - AddrModeMask = 0x1f, - AddrModeNone = 0, - AddrMode1 = 1, - AddrMode2 = 2, - AddrMode3 = 3, - AddrMode4 = 4, - AddrMode5 = 5, - AddrMode6 = 6, - AddrModeT1_1 = 7, - AddrModeT1_2 = 8, - AddrModeT1_4 = 9, - AddrModeT1_s = 10, // i8 * 4 for pc and sp relative data - AddrModeT2_i12 = 11, - AddrModeT2_i8 = 12, - AddrModeT2_so = 13, - AddrModeT2_pc = 14, // +/- i12 for pc relative data - AddrModeT2_i8s4 = 15, // i8 * 4 - AddrMode_i12 = 16, + AddrModeMask = 0x1f, // The AddrMode enums are declared in ARMBaseInfo.h // Size* - Flags to keep track of the size of an instruction. SizeShift = 5, @@ -64,11 +46,9 @@ namespace ARMII { // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load // and store ops only. Generic "updating" flag is used for ld/st multiple. + // The index mode enums are declared in ARMBaseInfo.h IndexModeShift = 8, IndexModeMask = 3 << IndexModeShift, - IndexModePre = 1, - IndexModePost = 2, - IndexModeUpd = 3, //===------------------------------------------------------------------===// // Instruction encoding formats. @@ -311,7 +291,7 @@ public: int64_t &Offset1, int64_t &Offset2)const; /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to - /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should + /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should /// be scheduled togther. On some targets if two loads are loading from /// addresses in the same cache line, it's better if they are scheduled /// together. This function takes two integers that represent the load offsets @@ -327,7 +307,7 @@ public: const MachineFunction &MF) const; virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, - unsigned NumCyles, unsigned ExtraPredCycles, + unsigned NumCycles, unsigned ExtraPredCycles, float Prob, float Confidence) const; virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB, @@ -337,10 +317,10 @@ public: float Probability, float Confidence) const; virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, - unsigned NumCyles, + unsigned NumCycles, float Probability, float Confidence) const { - return NumCyles == 1; + return NumCycles == 1; } /// AnalyzeCompare - For a comparison instruction, return the source register @@ -496,19 +476,19 @@ void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, - const ARMBaseInstrInfo &TII); + const ARMBaseInstrInfo &TII, unsigned MIFlags = 0); void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, - const ARMBaseInstrInfo &TII); + const ARMBaseInstrInfo &TII, unsigned MIFlags = 0); void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, + MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, const TargetInstrInfo &TII, const ARMBaseRegisterInfo& MRI, - DebugLoc dl); + unsigned MIFlags = 0); /// rewriteARMFrameIndex / rewriteT2FrameIndex - diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 67a4b7d..ea1f08a 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -88,7 +88,7 @@ BitVector ARMBaseRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - // FIXME: avoid re-calculating this everytime. + // FIXME: avoid re-calculating this every time. BitVector Reserved(getNumRegs()); Reserved.set(ARM::SP); Reserved.set(ARM::PC); @@ -342,12 +342,51 @@ ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC, return false; } +const TargetRegisterClass* +ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) + const { + const TargetRegisterClass *Super = RC; + TargetRegisterClass::sc_iterator I = RC->superclasses_begin(); + do { + switch (Super->getID()) { + case ARM::GPRRegClassID: + case ARM::SPRRegClassID: + case ARM::DPRRegClassID: + case ARM::QPRRegClassID: + case ARM::QQPRRegClassID: + case ARM::QQQQPRRegClassID: + return Super; + } + Super = *I++; + } while (Super); + return RC; +} const TargetRegisterClass * ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const { return ARM::GPRRegisterClass; } +unsigned +ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + switch (RC->getID()) { + default: + return 0; + case ARM::tGPRRegClassID: + return TFI->hasFP(MF) ? 4 : 5; + case ARM::GPRRegClassID: { + unsigned FP = TFI->hasFP(MF) ? 1 : 0; + return 10 - FP - (STI.isR9Reserved() ? 1 : 0); + } + case ARM::SPRRegClassID: // Currently not used as 'rep' register class. + case ARM::DPRRegClassID: + return 32 - 10; + } +} + /// getAllocationOrder - Returns the register allocation order for a specified /// register class in the form of a pair of TargetRegisterClass iterators. std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator> @@ -428,6 +467,10 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC, ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8 }; + // We only support even/odd hints for GPR and rGPR. + if (RC != ARM::GPRRegisterClass && RC != ARM::rGPRRegisterClass) + return std::make_pair(RC->allocation_order_begin(MF), + RC->allocation_order_end(MF)); if (HintType == ARMRI::RegPairEven) { if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0) @@ -530,6 +573,29 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, } } +bool +ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { + // CortexA9 has a Write-after-write hazard for NEON registers. + if (!STI.isCortexA9()) + return false; + + switch (RC->getID()) { + case ARM::DPRRegClassID: + case ARM::DPR_8RegClassID: + case ARM::DPR_VFP2RegClassID: + case ARM::QPRRegClassID: + case ARM::QPR_8RegClassID: + case ARM::QPR_VFP2RegClassID: + case ARM::SPRRegClassID: + case ARM::SPR_8RegClassID: + // Avoid reusing S, D, and Q registers. + // Don't increase register pressure for QQ and QQQQ. + return true; + default: + return false; + } +} + bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -806,7 +872,7 @@ emitLoadConstPool(MachineBasicBlock &MBB, DebugLoc dl, unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred, - unsigned PredReg) const { + unsigned PredReg, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = @@ -816,7 +882,8 @@ emitLoadConstPool(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp)) .addReg(DestReg, getDefRegState(true), SubIdx) .addConstantPoolIndex(Idx) - .addImm(0).addImm(Pred).addReg(PredReg); + .addImm(0).addImm(Pred).addReg(PredReg) + .setMIFlags(MIFlags); } bool ARMBaseRegisterInfo:: diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index ba6bd2b..9edf72d 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -128,6 +128,12 @@ public: const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; + const TargetRegisterClass* + getLargestLegalSuperClass(const TargetRegisterClass *RC) const; + + unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const; + std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator> getAllocationOrder(const TargetRegisterClass *RC, unsigned HintType, unsigned HintReg, @@ -139,6 +145,8 @@ public: void UpdateRegAllocHint(unsigned Reg, unsigned NewReg, MachineFunction &MF) const; + virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const; + bool hasBasePointer(const MachineFunction &MF) const; bool canRealignStack(const MachineFunction &MF) const; @@ -176,7 +184,8 @@ public: unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred = ARMCC::AL, - unsigned PredReg = 0) const; + unsigned PredReg = 0, + unsigned MIFlags = MachineInstr::NoFlags)const; /// Code Generation virtual methods... virtual bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 426ba13..d2981c0 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -22,6 +22,9 @@ class CCIfAlign<string Align, CCAction A>: //===----------------------------------------------------------------------===// def CC_ARM_APCS : CallingConv<[ + // Handles byval parameters. + CCIfByVal<CCPassByVal<4, 4>>, + CCIfType<[i8, i16], CCPromoteToType<i32>>, // Handle all vector types as either f64 or v2f64. diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 9bbf6a0..fa73716 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -312,6 +312,15 @@ namespace { unsigned getRegisterListOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } + unsigned getShiftRight8Imm(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getShiftRight16Imm(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getShiftRight32Imm(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getShiftRight64Imm(const MachineInstr &MI, unsigned Op) + const { return 0; } + /// getMovi32Value - Return binary encoding of operand for movw/movt. If the /// machine operand requires relocation, record the relocation and return /// zero. @@ -969,7 +978,7 @@ unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) { unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI, const TargetInstrDesc &TID) const { - for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i != e; --i){ + for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i >= e; --i){ const MachineOperand &MO = MI.getOperand(i-1); if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) return 1 << ARMII::S_BitShift; diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 13d1b33..baf95a3 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -1650,24 +1650,27 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2; unsigned DestOffset = BBOffsets[DestBB->getNumber()]; if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) { - MachineBasicBlock::iterator CmpMI = Br.MI; --CmpMI; - if (CmpMI->getOpcode() == ARM::tCMPi8) { - unsigned Reg = CmpMI->getOperand(0).getReg(); - Pred = llvm::getInstrPredicate(CmpMI, PredReg); - if (Pred == ARMCC::AL && - CmpMI->getOperand(1).getImm() == 0 && - isARMLowRegister(Reg)) { - MachineBasicBlock *MBB = Br.MI->getParent(); - MachineInstr *NewBR = - BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc)) - .addReg(Reg).addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags()); - CmpMI->eraseFromParent(); - Br.MI->eraseFromParent(); - Br.MI = NewBR; - BBSizes[MBB->getNumber()] -= 2; - AdjustBBOffsetsAfter(MBB, -2); - ++NumCBZ; - MadeChange = true; + MachineBasicBlock::iterator CmpMI = Br.MI; + if (CmpMI != Br.MI->getParent()->begin()) { + --CmpMI; + if (CmpMI->getOpcode() == ARM::tCMPi8) { + unsigned Reg = CmpMI->getOperand(0).getReg(); + Pred = llvm::getInstrPredicate(CmpMI, PredReg); + if (Pred == ARMCC::AL && + CmpMI->getOperand(1).getImm() == 0 && + isARMLowRegister(Reg)) { + MachineBasicBlock *MBB = Br.MI->getParent(); + MachineInstr *NewBR = + BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc)) + .addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags()); + CmpMI->eraseFromParent(); + Br.MI->eraseFromParent(); + Br.MI = NewBR; + BBSizes[MBB->getNumber()] -= 2; + AdjustBBOffsetsAfter(MBB, -2); + ++NumCBZ; + MadeChange = true; + } } } } diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index bd753d2..b6b3c75 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -455,6 +455,10 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { // Add an implicit def for the super-register. MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); TransferImpOps(MI, MIB, MIB); + + // Transfer memoperands. + MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MI.eraseFromParent(); } @@ -496,10 +500,13 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); - if (SrcIsKill) - // Add an implicit kill for the super-reg. - (*MIB).addRegisterKilled(SrcReg, TRI, true); + if (SrcIsKill) // Add an implicit kill for the super-reg. + MIB->addRegisterKilled(SrcReg, TRI, true); TransferImpOps(MI, MIB, MIB); + + // Transfer memoperands. + MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MI.eraseFromParent(); } @@ -622,9 +629,8 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); - if (SrcIsKill) - // Add an implicit kill for the super-reg. - (*MIB).addRegisterKilled(SrcReg, TRI, true); + if (SrcIsKill) // Add an implicit kill for the super-reg. + MIB->addRegisterKilled(SrcReg, TRI, true); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); } @@ -655,8 +661,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal); LO16 = LO16.addImm(SOImmValV1); HI16 = HI16.addImm(SOImmValV2); - (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); LO16.addImm(Pred).addReg(PredReg).addReg(0); HI16.addImm(Pred).addReg(PredReg).addReg(0); TransferImpOps(MI, LO16, HI16); @@ -692,8 +698,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); } - (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); LO16.addImm(Pred).addReg(PredReg); HI16.addImm(Pred).addReg(PredReg); @@ -708,6 +714,78 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, switch (Opcode) { default: return false; + case ARM::VMOVScc: + case ARM::VMOVDcc: { + unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD; + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc), + MI.getOperand(1).getReg()) + .addReg(MI.getOperand(2).getReg(), + getKillRegState(MI.getOperand(2).isKill())) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .addReg(MI.getOperand(4).getReg()); + + MI.eraseFromParent(); + return true; + } + case ARM::MOVCCr: { + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVr), + MI.getOperand(1).getReg()) + .addReg(MI.getOperand(2).getReg(), + getKillRegState(MI.getOperand(2).isKill())) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .addReg(MI.getOperand(4).getReg()) + .addReg(0); // 's' bit + + MI.eraseFromParent(); + return true; + } + case ARM::MOVCCs: { + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs), + (MI.getOperand(1).getReg())) + .addReg(MI.getOperand(2).getReg(), + getKillRegState(MI.getOperand(2).isKill())) + .addReg(MI.getOperand(3).getReg(), + getKillRegState(MI.getOperand(3).isKill())) + .addImm(MI.getOperand(4).getImm()) + .addImm(MI.getOperand(5).getImm()) // 'pred' + .addReg(MI.getOperand(6).getReg()) + .addReg(0); // 's' bit + + MI.eraseFromParent(); + return true; + } + case ARM::MOVCCi16: { + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi16), + MI.getOperand(1).getReg()) + .addImm(MI.getOperand(2).getImm()) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .addReg(MI.getOperand(4).getReg()); + + MI.eraseFromParent(); + return true; + } + case ARM::MOVCCi: { + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), + MI.getOperand(1).getReg()) + .addImm(MI.getOperand(2).getImm()) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .addReg(MI.getOperand(4).getReg()) + .addReg(0); // 's' bit + + MI.eraseFromParent(); + return true; + } + case ARM::MVNCCi: { + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi), + MI.getOperand(1).getReg()) + .addImm(MI.getOperand(2).getImm()) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .addReg(MI.getOperand(4).getReg()) + .addReg(0); // 's' bit + + MI.eraseFromParent(); + return true; + } case ARM::Int_eh_sjlj_dispatchsetup: { MachineFunction &MF = *MI.getParent()->getParent(); const ARMBaseInstrInfo *AII = @@ -726,9 +804,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, llvm::emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, FramePtr, -NumBytes, ARMCC::AL, 0, *TII); } else if (AFI->isThumbFunction()) { - llvm::emitThumbRegPlusImmediate(MBB, MBBI, ARM::R6, - FramePtr, -NumBytes, - *TII, RI, MI.getDebugLoc()); + llvm::emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, *TII, RI); } else { llvm::emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, FramePtr, -NumBytes, ARMCC::AL, 0, @@ -785,7 +862,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, TII->get(ARM::BL)) .addExternalSymbol("__aeabi_read_tp", 0); - (*MIB).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); return true; @@ -800,7 +877,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg) .addOperand(MI.getOperand(1))); - (*MIB1).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MIB1->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD)) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) @@ -823,7 +900,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, const MachineOperand &MO1 = MI.getOperand(1); const GlobalValue *GV = MO1.getGlobal(); unsigned TF = MO1.getTargetFlags(); - bool isARM = Opcode != ARM::t2MOV_ga_pcrel; + bool isARM = (Opcode != ARM::t2MOV_ga_pcrel && Opcode != ARM::t2MOV_ga_dyn); bool isPIC = (Opcode != ARM::MOV_ga_dyn && Opcode != ARM::t2MOV_ga_dyn); unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel; unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel : ARM::t2MOVTi16_ga_pcrel; @@ -856,7 +933,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, if (isARM) { AddDefaultPred(MIB3); if (Opcode == ARM::MOV_ga_pcrel_ldr) - (*MIB2).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MIB2->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); } TransferImpOps(MI, MIB1, MIB3); MI.eraseFromParent(); @@ -896,9 +973,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, return true; } - case ARM::VLDMQIA: - case ARM::VLDMQDB: { - unsigned NewOpc = (Opcode == ARM::VLDMQIA) ? ARM::VLDMDIA : ARM::VLDMDDB; + case ARM::VLDMQIA: { + unsigned NewOpc = ARM::VLDMDIA; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); unsigned OpIdx = 0; @@ -927,9 +1003,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, return true; } - case ARM::VSTMQIA: - case ARM::VSTMQDB: { - unsigned NewOpc = (Opcode == ARM::VSTMQIA) ? ARM::VSTMDIA : ARM::VSTMDDB; + case ARM::VSTMQIA: { + unsigned NewOpc = ARM::VSTMDIA; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); unsigned OpIdx = 0; @@ -950,9 +1025,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1); MIB.addReg(D0).addReg(D1); - if (SrcIsKill) - // Add an implicit kill for the Q register. - (*MIB).addRegisterKilled(SrcReg, TRI, true); + if (SrcIsKill) // Add an implicit kill for the Q register. + MIB->addRegisterKilled(SrcReg, TRI, true); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); @@ -960,14 +1034,16 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, } case ARM::VDUPfqf: case ARM::VDUPfdf:{ - unsigned NewOpc = Opcode == ARM::VDUPfqf ? ARM::VDUPLNfq : ARM::VDUPLNfd; + unsigned NewOpc = Opcode == ARM::VDUPfqf ? ARM::VDUPLN32q : + ARM::VDUPLN32d; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); unsigned OpIdx = 0; unsigned SrcReg = MI.getOperand(1).getReg(); unsigned Lane = getARMRegisterNumbering(SrcReg) & 1; unsigned DReg = TRI->getMatchingSuperReg(SrcReg, - Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass); + Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, + &ARM::DPR_VFP2RegClass); // The lane is [0,1] for the containing DReg superregister. // Copy the dst/src register operands. MIB.addOperand(MI.getOperand(OpIdx++)); diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 26f48b3..3baf274 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "ARM.h" +#include "ARMAddressingModes.h" #include "ARMBaseInstrInfo.h" #include "ARMCallingConv.h" #include "ARMRegisterInfo.h" @@ -26,6 +27,7 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" +#include "llvm/Operator.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -115,6 +117,11 @@ class ARMFastISel : public FastISel { const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill); + virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + unsigned Op2, bool Op2IsKill); virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, @@ -123,14 +130,18 @@ class ARMFastISel : public FastISel { const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, const ConstantFP *FPImm); - virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - uint64_t Imm); virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, uint64_t Imm); + virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm); + virtual unsigned FastEmitInst_ii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm1, uint64_t Imm2); + virtual unsigned FastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx); @@ -193,6 +204,7 @@ class ARMFastISel : public FastISel { // OptionalDef handling routines. private: + bool isARMNEONPred(const MachineInstr *MI); bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR); const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); void AddLoadStoreOperands(EVT VT, Address &Addr, @@ -221,6 +233,21 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { return true; } +bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) { + const TargetInstrDesc &TID = MI->getDesc(); + + // If we're a thumb2 or not NEON function we were handled via isPredicable. + if ((TID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON || + AFI->isThumb2Function()) + return false; + + for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) + if (TID.OpInfo[i].isPredicate()) + return true; + + return false; +} + // If the machine is predicable go ahead and add the predicate operands, if // it needs default CC operands add those. // TODO: If we want to support thumb1 then we'll need to deal with optional @@ -230,8 +257,10 @@ const MachineInstrBuilder & ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { MachineInstr *MI = &*MIB; - // Do we use a predicate? - if (TII.isPredicable(MI)) + // Do we use a predicate? or... + // Are we NEON in ARM mode and have a predicate operand? If so, I know + // we're not predicable but add it anyways. + if (TII.isPredicable(MI) || isARMNEONPred(MI)) AddDefaultPred(MIB); // Do we optionally set a predicate? Preds is size > 0 iff the predicate @@ -296,6 +325,31 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, return ResultReg; } +unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + unsigned Op2, bool Op2IsKill) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addReg(Op2, Op2IsKill * RegState::Kill)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addReg(Op2, Op2IsKill * RegState::Kill)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, @@ -384,6 +438,26 @@ unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, return ResultReg; } +unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm1, uint64_t Imm2) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addImm(Imm1).addImm(Imm2)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addImm(Imm1).addImm(Imm2)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), + ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx) { @@ -667,24 +741,29 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { TmpOffset += SL->getElementOffset(Idx); } else { uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); - SmallVector<const Value *, 4> Worklist; - Worklist.push_back(Op); - do { - Op = Worklist.pop_back_val(); + for (;;) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { // Constant-offset addressing. TmpOffset += CI->getSExtValue() * S; - } else if (isa<AddOperator>(Op) && - isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) { - // An add with a constant operand. Fold the constant. + break; + } + if (isa<AddOperator>(Op) && + (!isa<Instruction>(Op) || + FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()] + == FuncInfo.MBB) && + isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) { + // An add (in the same block) with a constant operand. Fold the + // constant. ConstantInt *CI = - cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); + cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); TmpOffset += CI->getSExtValue() * S; - // Add the other operand back to the work list. - Worklist.push_back(cast<AddOperator>(Op)->getOperand(0)); - } else - goto unsupported_gep; - } while (!Worklist.empty()); + // Iterate on the other operand. + Op = cast<AddOperator>(Op)->getOperand(0); + continue; + } + // Unsupported + goto unsupported_gep; + } } } @@ -767,26 +846,9 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) { // Since the offset is too large for the load/store instruction // get the reg+offset into a register. if (needsLowering) { - ARMCC::CondCodes Pred = ARMCC::AL; - unsigned PredReg = 0; - - TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass : - ARM::GPRRegisterClass; - unsigned BaseReg = createResultReg(RC); - - if (!isThumb) - emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - BaseReg, Addr.Base.Reg, Addr.Offset, - Pred, PredReg, - static_cast<const ARMBaseInstrInfo&>(TII)); - else { - assert(AFI->isThumb2Function()); - emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - BaseReg, Addr.Base.Reg, Addr.Offset, Pred, PredReg, - static_cast<const ARMBaseInstrInfo&>(TII)); - } + Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg, + /*Op0IsKill*/false, Addr.Offset, MVT::i32); Addr.Offset = 0; - Addr.Base.Reg = BaseReg; } } @@ -797,7 +859,7 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, if (VT.getSimpleVT().SimpleTy == MVT::f32 || VT.getSimpleVT().SimpleTy == MVT::f64) Addr.Offset /= 4; - + // Frame base works a bit differently. Handle it separately. if (Addr.BaseType == Address::FrameIndexBase) { int FI = Addr.Base.FI; @@ -819,7 +881,7 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, } else { // Now add the rest of the operands. MIB.addReg(Addr.Base.Reg); - + // ARM halfword load/stores need an additional operand. if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); @@ -1007,18 +1069,16 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { // behavior. // TODO: Factor this out. if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { - if (CI->hasOneUse() && (CI->getParent() == I->getParent())) { - MVT VT; - const Type *Ty = CI->getOperand(0)->getType(); - if (!isTypeLegal(Ty, VT)) - return false; - + MVT SourceVT; + const Type *Ty = CI->getOperand(0)->getType(); + if (CI->hasOneUse() && (CI->getParent() == I->getParent()) + && isTypeLegal(Ty, SourceVT)) { bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); if (isFloat && !Subtarget->hasVFP2()) return false; unsigned CmpOpc; - switch (VT.SimpleTy) { + switch (SourceVT.SimpleTy) { default: return false; // TODO: Verify compares. case MVT::f32: @@ -1033,7 +1093,14 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { } // Get the compare predicate. - ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate()); + // Try to take advantage of fallthrough opportunities. + CmpInst::Predicate Predicate = CI->getPredicate(); + if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { + std::swap(TBB, FBB); + Predicate = CmpInst::getInversePredicate(Predicate); + } + + ARMCC::CondCodes ARMPred = getComparePred(Predicate); // We may not handle every CC for now. if (ARMPred == ARMCC::AL) return false; @@ -1061,19 +1128,55 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { FuncInfo.MBB->addSuccessor(TBB); return true; } + } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) { + MVT SourceVT; + if (TI->hasOneUse() && TI->getParent() == I->getParent() && + (isTypeLegal(TI->getOperand(0)->getType(), SourceVT))) { + unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; + unsigned OpReg = getRegForValue(TI->getOperand(0)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TstOpc)) + .addReg(OpReg).addImm(1)); + + unsigned CCMode = ARMCC::NE; + if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { + std::swap(TBB, FBB); + CCMode = ARMCC::EQ; + } + + unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) + .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); + + FastEmitBranch(FBB, DL); + FuncInfo.MBB->addSuccessor(TBB); + return true; + } } unsigned CmpReg = getRegForValue(BI->getCondition()); if (CmpReg == 0) return false; - // Re-set the flags just in case. - unsigned CmpOpc = isThumb ? ARM::t2CMPri : ARM::CMPri; - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) - .addReg(CmpReg).addImm(0)); + // We've been divorced from our compare! Our block was split, and + // now our compare lives in a predecessor block. We musn't + // re-compare here, as the children of the compare aren't guaranteed + // live across the block boundary (we *could* check for this). + // Regardless, the compare has been done in the predecessor block, + // and it left a value for us in a virtual register. Ergo, we test + // the one-bit value left in the virtual register. + unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc)) + .addReg(CmpReg).addImm(1)); + + unsigned CCMode = ARMCC::NE; + if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { + std::swap(TBB, FBB); + CCMode = ARMCC::EQ; + } unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) - .addMBB(TBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); FastEmitBranch(FBB, DL); FuncInfo.MBB->addSuccessor(TBB); return true; @@ -1636,17 +1739,9 @@ bool ARMFastISel::SelectRet(const Instruction *I) { unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) { - // Depend our opcode for thumb on whether or not we're targeting an - // externally callable function. For libcalls we'll just pass a NULL GV - // in here. - bool isExternal = false; - if (!GV || GV->hasExternalLinkage()) isExternal = true; - // Darwin needs the r9 versions of the opcodes. bool isDarwin = Subtarget->isTargetDarwin(); - if (isThumb && isExternal) { - return isDarwin ? ARM::tBLXi_r9 : ARM::tBLXi; - } else if (isThumb) { + if (isThumb) { return isDarwin ? ARM::tBLr9 : ARM::tBL; } else { return isDarwin ? ARM::BLr9 : ARM::BL; @@ -1671,9 +1766,6 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { else if (!isTypeLegal(RetTy, RetVT)) return false; - // For now we're using BLX etc on the assumption that we have v5t ops. - if (!Subtarget->hasV5TOps()) return false; - // TODO: For now if we have long calls specified we don't handle the call. if (EnableARMLongCalls) return false; @@ -1711,7 +1803,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) return false; - // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. + // Issue the call, BLr9 for darwin, BL otherwise. // TODO: Turn this into the table of arm call ops. MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(NULL); @@ -1772,13 +1864,9 @@ bool ARMFastISel::SelectCall(const Instruction *I) { else if (!isTypeLegal(RetTy, RetVT)) return false; - // For now we're using BLX etc on the assumption that we have v5t ops. - // TODO: Maybe? - if (!Subtarget->hasV5TOps()) return false; - // TODO: For now if we have long calls specified we don't handle the call. if (EnableARMLongCalls) return false; - + // Set up the argument vectors. SmallVector<Value*, 8> Args; SmallVector<unsigned, 8> ArgRegs; @@ -1827,7 +1915,7 @@ bool ARMFastISel::SelectCall(const Instruction *I) { if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) return false; - // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. + // Issue the call, BLr9 for darwin, BL otherwise. // TODO: Turn this into the table of arm call ops. MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(GV); @@ -1842,7 +1930,7 @@ bool ARMFastISel::SelectCall(const Instruction *I) { MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) .addGlobalAddress(GV, 0, 0)); - + // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 68c33f0..e2e95d4 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -106,14 +106,13 @@ static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, const ARMBaseInstrInfo &TII, - int NumBytes, - ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { + int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) { if (isARM) emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, - Pred, PredReg, TII); + ARMCC::AL, 0, TII, MIFlags); else emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, - Pred, PredReg, TII); + ARMCC::AL, 0, TII, MIFlags); } void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { @@ -141,11 +140,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // Allocate the vararg register save area. This is not counted in NumBytes. if (VARegSaveSize) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize, + MachineInstr::FrameSetup); if (!AFI->hasStackFrame()) { if (NumBytes != 0) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, + MachineInstr::FrameSetup); return; } @@ -196,7 +197,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr) - .addFrameIndex(FramePtrSpillFI).addImm(0); + .addFrameIndex(FramePtrSpillFI).addImm(0) + .setMIFlag(MachineInstr::FrameSetup); AddDefaultCC(AddDefaultPred(MIB)); } @@ -226,7 +228,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { NumBytes = DPRCSOffset; if (NumBytes) { // Adjust SP after all the callee-save spills. - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, + MachineInstr::FrameSetup); if (HasFP && isARM) // Restore from fp only in ARM mode: e.g. sub sp, r7, #24 // Note it's not safe to do this in Thumb2 mode because it would have @@ -282,6 +285,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer // to reference locals. + // FIXME: Clarify FrameSetup flags here. if (RegInfo->hasBasePointer(MF)) { if (isARM) BuildMI(MBB, MBBI, dl, @@ -396,8 +400,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, // Jump to label or value in register. if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) { unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi) - ? (STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd) - : (STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND); + ? (STI.isThumb() ? ARM::tTAILJMPd : ARM::TAILJMPd) + : (STI.isThumb() ? ARM::tTAILJMPdND : ARM::TAILJMPdND); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); if (JumpTarget.isGlobal()) MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), @@ -408,10 +412,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, JumpTarget.getTargetFlags()); } } else if (RetOpcode == ARM::TCRETURNri) { - BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)). + BuildMI(MBB, MBBI, dl, + TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)). addReg(JumpTarget.getReg(), RegState::Kill); } else if (RetOpcode == ARM::TCRETURNriND) { - BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)). + BuildMI(MBB, MBBI, dl, + TII.get(STI.isThumb() ? ARM::tTAILJMPrND : ARM::TAILJMPrND)). addReg(JumpTarget.getReg(), RegState::Kill); } @@ -439,8 +445,7 @@ ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, - int FI, - unsigned &FrameReg, + int FI, unsigned &FrameReg, int SPAdj) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMBaseRegisterInfo *RegInfo = @@ -484,19 +489,23 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, return FPOffset; } else if (MFI->hasVarSizedObjects()) { assert(RegInfo->hasBasePointer(MF) && "missing base pointer!"); - // Try to use the frame pointer if we can, else use the base pointer - // since it's available. This is handy for the emergency spill slot, in - // particular. if (AFI->isThumb2Function()) { + // Try to use the frame pointer if we can, else use the base pointer + // since it's available. This is handy for the emergency spill slot, in + // particular. if (FPOffset >= -255 && FPOffset < 0) { FrameReg = RegInfo->getFrameRegister(MF); return FPOffset; } - } else - FrameReg = RegInfo->getBaseRegister(); + } } else if (AFI->isThumb2Function()) { + // Use add <rd>, sp, #<imm8> + // ldr <rd>, [sp, #<imm8>] + // if at all possible to save space. + if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020) + return Offset; // In Thumb2 mode, the negative offset is very limited. Try to avoid - // out of range references. + // out of range references. ldr <rt>,[<rn>, #-<imm8>] if (FPOffset >= -255 && FPOffset < 0) { FrameReg = RegInfo->getFrameRegister(MF); return FPOffset; @@ -524,7 +533,8 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc, unsigned StrOpc, bool NoGap, - bool(*Func)(unsigned, bool)) const { + bool(*Func)(unsigned, bool), + unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); @@ -567,14 +577,14 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, if (Regs.size() > 1 || StrOpc== 0) { MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP) - .addReg(ARM::SP)); + .addReg(ARM::SP).setMIFlags(MIFlags)); for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second)); } else if (Regs.size() == 1) { MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP) .addReg(Regs[0].first, getKillRegState(Regs[0].second)) - .addReg(ARM::SP); + .addReg(ARM::SP).setMIFlags(MIFlags); // ARM mode needs an extra reg0 here due to addrmode2. Will go away once // that refactoring is complete (eventually). if (StrOpc == ARM::STR_PRE) { @@ -676,9 +686,12 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD; unsigned PushOneOpc = AFI->isThumbFunction() ? ARM::t2STR_PRE : ARM::STR_PRE; unsigned FltOpc = ARM::VSTMDDB_UPD; - emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register); - emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register); - emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register); + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, + MachineInstr::FrameSetup); + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, + MachineInstr::FrameSetup); + emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, + MachineInstr::FrameSetup); return true; } diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index 1288b70..61bb8af 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -51,7 +51,8 @@ public: bool canSimplifyCallFramePseudos(const MachineFunction &MF) const; int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const; - int ResolveFrameIndexReference(const MachineFunction &MF, int FI, + int ResolveFrameIndexReference(const MachineFunction &MF, + int FI, unsigned &FrameReg, int SPAdj) const; int getFrameIndexOffset(const MachineFunction &MF, int FI) const; @@ -62,7 +63,8 @@ public: void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc, unsigned StrOpc, bool NoGap, - bool(*Func)(unsigned, bool)) const; + bool(*Func)(unsigned, bool), + unsigned MIFlags = 0) const; void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc, unsigned LdrOpc, bool isVarArg, bool NoGap, diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index e97ce50..517bba8 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -49,6 +49,8 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { const TargetInstrDesc &LastTID = LastMI->getDesc(); // Skip over one non-VFP / NEON instruction. if (!LastTID.isBarrier() && + // On A9, AGU and NEON/FPU are muxed. + !(STI.isCortexA9() && (LastTID.mayLoad() || LastTID.mayStore())) && (LastTID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) { MachineBasicBlock::iterator I = LastMI; if (I != LastMI->getParent()->begin()) { diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index f0d5a7d..abe5a31 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -45,7 +45,7 @@ DisableShifterOp("disable-shifter-op", cl::Hidden, static cl::opt<bool> CheckVMLxHazard("check-vmlx-hazard", cl::Hidden, cl::desc("Check fp vmla / vmls hazard at isel time"), - cl::init(false)); + cl::init(true)); //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine @@ -91,9 +91,14 @@ public: bool isShifterOpProfitable(const SDValue &Shift, ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); bool SelectShifterOperandReg(SDValue N, SDValue &A, - SDValue &B, SDValue &C); + SDValue &B, SDValue &C, + bool CheckProfitability = true); bool SelectShiftShifterOperandReg(SDValue N, SDValue &A, - SDValue &B, SDValue &C); + SDValue &B, SDValue &C) { + // Don't apply the profitability check + return SelectShifterOperandReg(N, A, B, C, false); + } + bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); @@ -174,16 +179,6 @@ public: return ARM_AM::getT2SOImmVal(~Imm) != -1; } - inline bool Pred_so_imm(SDNode *inN) const { - ConstantSDNode *N = cast<ConstantSDNode>(inN); - return is_so_imm(N->getZExtValue()); - } - - inline bool Pred_t2_so_imm(SDNode *inN) const { - ConstantSDNode *N = cast<ConstantSDNode>(inN); - return is_t2_so_imm(N->getZExtValue()); - } - // Include the pieces autogenerated from the target description. #include "ARMGenDAGISel.inc" @@ -373,7 +368,8 @@ bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N, SDValue &BaseReg, SDValue &ShReg, - SDValue &Opc) { + SDValue &Opc, + bool CheckProfitability) { if (DisableShifterOp) return false; @@ -390,7 +386,7 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N, ShImmVal = RHS->getZExtValue() & 31; } else { ShReg = N.getOperand(1); - if (!isShifterOpProfitable(N, ShOpcVal, ShImmVal)) + if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) return false; } Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), @@ -398,30 +394,6 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N, return true; } -bool ARMDAGToDAGISel::SelectShiftShifterOperandReg(SDValue N, - SDValue &BaseReg, - SDValue &ShReg, - SDValue &Opc) { - ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); - - // Don't match base register only case. That is matched to a separate - // lower complexity pattern with explicit register operand. - if (ShOpcVal == ARM_AM::no_shift) return false; - - BaseReg = N.getOperand(0); - unsigned ShImmVal = 0; - // Do not check isShifterOpProfitable. This must return true. - if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { - ShReg = CurDAG->getRegister(0, MVT::i32); - ShImmVal = RHS->getZExtValue() & 31; - } else { - ShReg = N.getOperand(1); - } - Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), - MVT::i32); - return true; -} - bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm) { @@ -437,7 +409,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } - + if (N.getOpcode() == ARMISD::Wrapper && !(Subtarget->useMovt() && N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { @@ -1138,7 +1110,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } - + if (N.getOpcode() == ARMISD::Wrapper && !(Subtarget->useMovt() && N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { @@ -1183,7 +1155,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && !CurDAG->isBaseWithConstantOffset(N)) return false; - + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { int RHSC = (int)RHS->getSExtValue(); if (N.getOpcode() == ISD::SUB) @@ -1571,6 +1543,11 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.data(), Ops.size()); } + // Transfer memoperands. + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); + cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1); + if (NumVecs == 1) return VLd; @@ -1600,6 +1577,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) return NULL; + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); + SDValue Chain = N->getOperand(0); EVT VT = N->getOperand(Vec0Idx).getValueType(); bool is64BitVector = VT.is64BitVector(); @@ -1672,7 +1652,13 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Pred); Ops.push_back(Reg0); Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); + SDNode *VSt = + CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); + + // Transfer memoperands. + cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1); + + return VSt; } // Otherwise, quad registers are stored with two separate instructions, @@ -1693,6 +1679,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, MemAddr.getValueType(), MVT::Other, OpsA, 7); + cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1); Chain = SDValue(VStA, 1); // Store the odd D registers. @@ -1709,8 +1696,10 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Pred); Ops.push_back(Reg0); Ops.push_back(Chain); - return CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, - Ops.data(), Ops.size()); + SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, + Ops.data(), Ops.size()); + cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1); + return VStB; } SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, @@ -1726,6 +1715,9 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) return NULL; + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); + SDValue Chain = N->getOperand(0); unsigned Lane = cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); @@ -1812,6 +1804,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, QOpcodes[OpcodeIndex]); SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); + cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1); if (!IsLoad) return VLdLn; @@ -1838,6 +1831,9 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align)) return NULL; + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); + SDValue Chain = N->getOperand(0); EVT VT = N->getValueType(0); @@ -1882,12 +1878,13 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; std::vector<EVT> ResTys; - ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts)); + ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts)); if (isUpdating) ResTys.push_back(MVT::i32); ResTys.push_back(MVT::Other); SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); + cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1); SuperReg = SDValue(VLdDup, 0); // Extract the subregisters. @@ -2168,7 +2165,7 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) // Pattern complexity = 6 cost = 11 size = 0 // - // Also FCPYScc and FCPYDcc. + // Also VMOVScc and VMOVDcc. SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32); SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag }; unsigned Opc = 0; @@ -2450,34 +2447,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::CMOV: return SelectCMOVOp(N); - case ARMISD::CNEG: { - EVT VT = N->getValueType(0); - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - SDValue N2 = N->getOperand(2); - SDValue N3 = N->getOperand(3); - SDValue InFlag = N->getOperand(4); - assert(N2.getOpcode() == ISD::Constant); - assert(N3.getOpcode() == ISD::Register); - - SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) - cast<ConstantSDNode>(N2)->getZExtValue()), - MVT::i32); - SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag }; - unsigned Opc = 0; - switch (VT.getSimpleVT().SimpleTy) { - default: assert(false && "Illegal conditional move type!"); - break; - case MVT::f32: - Opc = ARM::VNEGScc; - break; - case MVT::f64: - Opc = ARM::VNEGDcc; - break; - } - return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); - } - case ARMISD::VZIP: { unsigned Opc = 0; EVT VT = N->getValueType(0); @@ -2870,6 +2839,35 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { break; } + case ARMISD::VTBL1: { + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + SmallVector<SDValue, 6> Ops; + + Ops.push_back(N->getOperand(0)); + Ops.push_back(N->getOperand(1)); + Ops.push_back(getAL(CurDAG)); // Predicate + Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register + return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops.data(), Ops.size()); + } + case ARMISD::VTBL2: { + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + // Form a REG_SEQUENCE to force register allocation. + SDValue V0 = N->getOperand(0); + SDValue V1 = N->getOperand(1); + SDValue RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0); + + SmallVector<SDValue, 6> Ops; + Ops.push_back(RegSeq); + Ops.push_back(N->getOperand(2)); + Ops.push_back(getAL(CurDAG)); // Predicate + Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register + return CurDAG->getMachineNode(ARM::VTBL2Pseudo, dl, VT, + Ops.data(), Ops.size()); + } + case ISD::CONCAT_VECTORS: return SelectConcatVector(N); } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index ab9f9e1..0a31b87 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -72,6 +72,11 @@ ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true)); +// The APCS parameter registers. +static const unsigned GPRArgRegs[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3 +}; + void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT PromotedBitwiseVT) { if (VT != PromotedLdStVT) { @@ -393,6 +398,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); } + // Use divmod iOS compiler-rt calls. + if (Subtarget->getTargetTriple().getOS() == Triple::IOS) { + setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); + setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); + } + if (Subtarget->isThumb1Only()) addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); else @@ -461,6 +472,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::UDIV, MVT::v8i8, Custom); setOperationAction(ISD::VSETCC, MVT::v1i64, Expand); setOperationAction(ISD::VSETCC, MVT::v2i64, Expand); + // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with + // a destination type that is wider than the source. + setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); @@ -502,18 +517,15 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } // i64 operation support. + setOperationAction(ISD::MUL, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::i32, Expand); if (Subtarget->isThumb1Only()) { - setOperationAction(ISD::MUL, MVT::i64, Expand); - setOperationAction(ISD::MULHU, MVT::i32, Expand); - setOperationAction(ISD::MULHS, MVT::i32, Expand); setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - } else { - setOperationAction(ISD::MUL, MVT::i64, Expand); - setOperationAction(ISD::MULHU, MVT::i32, Expand); - if (!Subtarget->hasV6Ops()) - setOperationAction(ISD::MULHS, MVT::i32, Expand); } + if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()) + setOperationAction(ISD::MULHS, MVT::i32, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); @@ -597,6 +609,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); // Since the libcalls include locking, fold in the fences setShouldFoldAtomicFences(true); } @@ -716,7 +740,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // pressure of the register class's representative and all of it's super // classes' representatives transitively. We have not implemented this because // of the difficulty prior to coalescing of modeling operand register classes -// due to the common occurence of cross class copies and subregister insertions +// due to the common occurrence of cross class copies and subregister insertions // and extractions. std::pair<const TargetRegisterClass*, uint8_t> ARMTargetLowering::findRepresentativeClass(EVT VT) const{ @@ -778,7 +802,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; case ARMISD::CMOV: return "ARMISD::CMOV"; - case ARMISD::CNEG: return "ARMISD::CNEG"; case ARMISD::RBIT: return "ARMISD::RBIT"; @@ -853,6 +876,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VZIP: return "ARMISD::VZIP"; case ARMISD::VUZP: return "ARMISD::VUZP"; case ARMISD::VTRN: return "ARMISD::VTRN"; + case ARMISD::VTBL1: return "ARMISD::VTBL1"; + case ARMISD::VTBL2: return "ARMISD::VTBL2"; case ARMISD::VMULLs: return "ARMISD::VMULLs"; case ARMISD::VMULLu: return "ARMISD::VMULLu"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; @@ -861,6 +886,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::BFI: return "ARMISD::BFI"; case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; + case ARMISD::VBSL: return "ARMISD::VBSL"; case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; @@ -946,27 +972,6 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { return Sched::RegPressure; } -// FIXME: Move to RegInfo -unsigned -ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - switch (RC->getID()) { - default: - return 0; - case ARM::tGPRRegClassID: - return TFI->hasFP(MF) ? 4 : 5; - case ARM::GPRRegClassID: { - unsigned FP = TFI->hasFP(MF) ? 1 : 0; - return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0); - } - case ARM::SPRRegClassID: // Currently not used as 'rep' register class. - case ARM::DPRRegClassID: - return 32 - 10; - } -} - //===----------------------------------------------------------------------===// // Lowering Code //===----------------------------------------------------------------------===// @@ -1130,22 +1135,6 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, return Chain; } -/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified -/// by "Src" to address "Dst" of size "Size". Alignment information is -/// specified by the specific parameter attribute. The copy will be passed as -/// a byval function parameter. -/// Sometimes what we are copying is the end of a larger object, the part that -/// does not fit in registers. -static SDValue -CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, - ISD::ArgFlagsTy Flags, SelectionDAG &DAG, - DebugLoc dl) { - SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); - return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), - /*isVolatile=*/false, /*AlwaysInline=*/false, - MachinePointerInfo(0), MachinePointerInfo(0)); -} - /// LowerMemOpCallTo - Store the argument to the stack. SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, @@ -1156,9 +1145,6 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, unsigned LocMemOffset = VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); - if (Flags.isByVal()) - return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); - return DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo::getStack(LocMemOffset), false, false, 0); @@ -1224,6 +1210,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); + CCInfo.setCallOrPrologue(Call); CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv, /* Return*/ false, isVarArg)); @@ -1253,6 +1240,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[realArgIdx]; ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; + bool isByVal = Flags.isByVal(); // Promote the value if needed. switch (VA.getLocInfo()) { @@ -1299,6 +1287,43 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, } } else if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else if (isByVal) { + assert(VA.isMemLoc()); + unsigned offset = 0; + + // True if this byval aggregate will be split between registers + // and memory. + if (CCInfo.isFirstByValRegValid()) { + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + unsigned int i, j; + for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) { + SDValue Const = DAG.getConstant(4*i, MVT::i32); + SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); + SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, + MachinePointerInfo(), + false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(j, Load)); + } + offset = ARM::R4 - CCInfo.getFirstByValReg(); + CCInfo.clearFirstByValReg(); + } + + unsigned LocMemOffset = VA.getLocMemOffset(); + SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset); + SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, + StkPtrOff); + SDValue SrcOffset = DAG.getIntPtrConstant(4*offset); + SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); + SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, + MVT::i32); + MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, + Flags.getByValAlign(), + /*isVolatile=*/false, + /*AlwaysInline=*/false, + MachinePointerInfo(0), + MachinePointerInfo(0))); + } else if (!IsSibCall) { assert(VA.isMemLoc()); @@ -1332,7 +1357,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // than necessary, because it means that each store effectively depends // on every argument instead of just those arguments it would clobber. - // Do not flag preceeding copytoreg stuff together with the following stuff. + // Do not flag preceding copytoreg stuff together with the following stuff. InFlag = SDValue(); for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, @@ -1492,6 +1517,35 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, dl, DAG, InVals); } +/// HandleByVal - Every parameter *after* a byval parameter is passed +/// on the stack. Remember the next parameter register to allocate, +/// and then confiscate the rest of the parameter registers to insure +/// this. +void +llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { + unsigned reg = State->AllocateReg(GPRArgRegs, 4); + assert((State->getCallOrPrologue() == Prologue || + State->getCallOrPrologue() == Call) && + "unhandled ParmContext"); + if ((!State->isFirstByValRegValid()) && + (ARM::R0 <= reg) && (reg <= ARM::R3)) { + State->setFirstByValReg(reg); + // At a call site, a byval parameter that is split between + // registers and memory needs its size truncated here. In a + // function prologue, such byval parameters are reassembled in + // memory, and are not truncated. + if (State->getCallOrPrologue() == Call) { + unsigned excess = 4 * (ARM::R4 - reg); + assert(size >= excess && "expected larger existing stack allocation"); + size -= excess; + } + } + // Confiscate any remaining parameter registers to preclude their + // assignment to subsequent parameters. + while (State->AllocateReg(GPRArgRegs, 4)) + ; +} + /// MatchingStackOffset - Return true if the given stack call argument is /// already available in the same position (relatively) of the caller's /// incoming argument stack. @@ -1813,6 +1867,16 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const { return HasRet; } +bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { + if (!EnableARMTailCalls) + return false; + + if (!CI->isTailCall()) + return false; + + return !Subtarget->isThumb1Only(); +} + // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is // one of the above mentioned nodes. It has to be wrapped because otherwise @@ -2096,7 +2160,7 @@ ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, - Op.getOperand(0), Op.getOperand(1)); + Op.getOperand(0)); } SDValue @@ -2151,6 +2215,13 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, } return Result; } + case Intrinsic::arm_neon_vmulls: + case Intrinsic::arm_neon_vmullu: { + unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) + ? ARMISD::VMULLs : ARMISD::VMULLu; + return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + } } } @@ -2257,6 +2328,88 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); } +void +ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, + unsigned &VARegSize, unsigned &VARegSaveSize) + const { + unsigned NumGPRs; + if (CCInfo.isFirstByValRegValid()) + NumGPRs = ARM::R4 - CCInfo.getFirstByValReg(); + else { + unsigned int firstUnalloced; + firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs, + sizeof(GPRArgRegs) / + sizeof(GPRArgRegs[0])); + NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; + } + + unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + VARegSize = NumGPRs * 4; + VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); +} + +// The remaining GPRs hold either the beginning of variable-argument +// data, or the beginning of an aggregate passed by value (usuall +// byval). Either way, we allocate stack slots adjacent to the data +// provided by our caller, and store the unallocated registers there. +// If this is a variadic function, the va_list pointer will begin with +// these values; otherwise, this reassembles a (byval) structure that +// was split between registers and memory. +void +ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, + DebugLoc dl, SDValue &Chain, + unsigned ArgOffset) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned firstRegToSaveIndex; + if (CCInfo.isFirstByValRegValid()) + firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0; + else { + firstRegToSaveIndex = CCInfo.getFirstUnallocated + (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); + } + + unsigned VARegSize, VARegSaveSize; + computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize); + if (VARegSaveSize) { + // If this function is vararg, store any remaining integer argument regs + // to their spots on the stack so that they may be loaded by deferencing + // the result of va_next. + AFI->setVarArgsRegSaveSize(VARegSaveSize); + AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize, + ArgOffset + VARegSaveSize + - VARegSize, + false)); + SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), + getPointerTy()); + + SmallVector<SDValue, 4> MemOps; + for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) { + TargetRegisterClass *RC; + if (AFI->isThumb1OnlyFunction()) + RC = ARM::tGPRRegisterClass; + else + RC = ARM::GPRRegisterClass; + + unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); + SDValue Store = + DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()), + false, false, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, + DAG.getConstant(4, getPointerTy())); + } + if (!MemOps.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOps[0], MemOps.size()); + } else + // This will point to the next argument passed via stack. + AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true)); +} + SDValue ARMTargetLowering::LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -2265,7 +2418,6 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { - MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -2275,12 +2427,15 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); + CCInfo.setCallOrPrologue(Prologue); CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv, /* Return*/ false, isVarArg)); SmallVector<SDValue, 16> ArgValues; + int lastInsIndex = -1; + SDValue ArgValue; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -2288,7 +2443,6 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, if (VA.isRegLoc()) { EVT RegVT = VA.getLocVT(); - SDValue ArgValue; if (VA.needsCustom()) { // f64 and vector types are split up into multiple registers or // combinations of registers and stack slots. @@ -2364,67 +2518,45 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, assert(VA.isMemLoc()); assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); - unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; - int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), true); + int index = ArgLocs[i].getValNo(); + + // Some Ins[] entries become multiple ArgLoc[] entries. + // Process them only once. + if (index != lastInsIndex) + { + ISD::ArgFlagsTy Flags = Ins[index].Flags; + // FIXME: For now, all byval parameter objects are marked mutable. + // This can be changed with more analysis. + // In case of tail call optimization mark all arguments mutable. + // Since they could be overwritten by lowering of arguments in case of + // a tail call. + if (Flags.isByVal()) { + unsigned VARegSize, VARegSaveSize; + computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize); + VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0); + unsigned Bytes = Flags.getByValSize() - VARegSize; + if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. + int FI = MFI->CreateFixedObject(Bytes, + VA.getLocMemOffset(), false); + InVals.push_back(DAG.getFrameIndex(FI, getPointerTy())); + } else { + int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, + VA.getLocMemOffset(), true); - // Create load nodes to retrieve arguments from the stack. - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, 0)); + // Create load nodes to retrieve arguments from the stack. + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, + MachinePointerInfo::getFixedStack(FI), + false, false, 0)); + } + lastInsIndex = index; + } } } // varargs - if (isVarArg) { - static const unsigned GPRArgRegs[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3 - }; - - unsigned NumGPRs = CCInfo.getFirstUnallocated - (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); - - unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); - unsigned VARegSize = (4 - NumGPRs) * 4; - unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); - unsigned ArgOffset = CCInfo.getNextStackOffset(); - if (VARegSaveSize) { - // If this function is vararg, store any remaining integer argument regs - // to their spots on the stack so that they may be loaded by deferencing - // the result of va_next. - AFI->setVarArgsRegSaveSize(VARegSaveSize); - AFI->setVarArgsFrameIndex( - MFI->CreateFixedObject(VARegSaveSize, - ArgOffset + VARegSaveSize - VARegSize, - false)); - SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), - getPointerTy()); - - SmallVector<SDValue, 4> MemOps; - for (; NumGPRs < 4; ++NumGPRs) { - TargetRegisterClass *RC; - if (AFI->isThumb1OnlyFunction()) - RC = ARM::tGPRRegisterClass; - else - RC = ARM::GPRRegisterClass; - - unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); - SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); - SDValue Store = - DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()), - false, false, 0); - MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, - DAG.getConstant(4, getPointerTy())); - } - if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOps[0], MemOps.size()); - } else - // This will point to the next argument passed via stack. - AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true)); - } + if (isVarArg) + VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset()); return Chain; } @@ -2517,6 +2649,27 @@ ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); } +/// duplicateCmp - Glue values can have only one use, so this function +/// duplicates a comparison node. +SDValue +ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { + unsigned Opc = Cmp.getOpcode(); + DebugLoc DL = Cmp.getDebugLoc(); + if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) + return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); + + assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); + Cmp = Cmp.getOperand(0); + Opc = Cmp.getOpcode(); + if (Opc == ARMISD::CMPFP) + Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); + else { + assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); + Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); + } + return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); +} + SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); @@ -2552,7 +2705,7 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Cond.getValueType(); SDValue ARMcc = Cond.getOperand(2); SDValue CCR = Cond.getOperand(3); - SDValue Cmp = Cond.getOperand(4); + SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); } } @@ -2681,8 +2834,8 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { // If one of the operand is zero, it's safe to ignore the NaN case since // we only care about equality comparisons. (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) { - // If unsafe fp math optimization is enabled and there are no othter uses of - // the CMP operands, and the condition code is EQ oe NE, we can optimize it + // If unsafe fp math optimization is enabled and there are no other uses of + // the CMP operands, and the condition code is EQ or NE, we can optimize it // to an integer comparison. if (CC == ISD::SETOEQ) CC = ISD::SETEQ; @@ -2811,8 +2964,39 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); } +static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + + EVT OperandVT = Op.getOperand(0).getValueType(); + assert(OperandVT == MVT::v4i16 && "Invalid type for custom lowering!"); + if (VT != MVT::v4f32) + return DAG.UnrollVectorOp(Op.getNode()); + + unsigned CastOpc; + unsigned Opc; + switch (Op.getOpcode()) { + default: + assert(0 && "Invalid opcode!"); + case ISD::SINT_TO_FP: + CastOpc = ISD::SIGN_EXTEND; + Opc = ISD::SINT_TO_FP; + break; + case ISD::UINT_TO_FP: + CastOpc = ISD::ZERO_EXTEND; + Opc = ISD::UINT_TO_FP; + break; + } + + Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0)); + return DAG.getNode(Opc, dl, VT, Op); +} + static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); + if (VT.isVector()) + return LowerVectorINT_TO_FP(Op, DAG); + DebugLoc dl = Op.getDebugLoc(); unsigned Opc; @@ -2860,7 +3044,10 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), DAG.getConstant(32, MVT::i32)); - } + } else if (VT == MVT::f32) + Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64, + DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1), + DAG.getConstant(32, MVT::i32)); Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); @@ -2869,11 +3056,11 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); - + SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); - if (SrcVT == MVT::f32) { + if (VT == MVT::f32) { Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, DAG.getConstant(0, MVT::i32)); @@ -3508,6 +3695,13 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, return true; } +static bool isVTBLMask(const SmallVectorImpl<int> &M, EVT VT) { + // We can handle <8 x i8> vector shuffles. If the index in the mask is out of + // range, then 0 is placed into the resulting vector. So pretty much any mask + // of 8 elements can work here. + return VT == MVT::v8i8 && M.size() == 8; +} + static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); @@ -3947,6 +4141,7 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, isVREVMask(M, VT, 32) || isVREVMask(M, VT, 16) || isVEXTMask(M, VT, ReverseVEXT, Imm) || + isVTBLMask(M, VT) || isVTRNMask(M, VT, WhichResult) || isVUZPMask(M, VT, WhichResult) || isVZIPMask(M, VT, WhichResult) || @@ -4024,6 +4219,29 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, } } +static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, + SmallVectorImpl<int> &ShuffleMask, + SelectionDAG &DAG) { + // Check to see if we can use the VTBL instruction. + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + DebugLoc DL = Op.getDebugLoc(); + + SmallVector<SDValue, 8> VTBLMask; + for (SmallVectorImpl<int>::iterator + I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) + VTBLMask.push_back(DAG.getConstant(*I, MVT::i32)); + + if (V2.getNode()->getOpcode() == ISD::UNDEF) + return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, + &VTBLMask[0], 8)); + + return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, + &VTBLMask[0], 8)); +} + static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); @@ -4141,6 +4359,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BITCAST, dl, VT, Val); } + if (VT == MVT::v8i8) { + SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG); + if (NewOp.getNode()) + return NewOp; + } + return SDValue(); } @@ -4290,6 +4514,28 @@ static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) { MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); } +static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { + unsigned Opcode = N->getOpcode(); + if (Opcode == ISD::ADD || Opcode == ISD::SUB) { + SDNode *N0 = N->getOperand(0).getNode(); + SDNode *N1 = N->getOperand(1).getNode(); + return N0->hasOneUse() && N1->hasOneUse() && + isSignExtended(N0, DAG) && isSignExtended(N1, DAG); + } + return false; +} + +static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) { + unsigned Opcode = N->getOpcode(); + if (Opcode == ISD::ADD || Opcode == ISD::SUB) { + SDNode *N0 = N->getOperand(0).getNode(); + SDNode *N1 = N->getOperand(1).getNode(); + return N0->hasOneUse() && N1->hasOneUse() && + isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG); + } + return false; +} + static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { // Multiplications are only custom-lowered for 128-bit vectors so that // VMULL can be detected. Otherwise v2i64 multiplications are not legal. @@ -4298,29 +4544,73 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { SDNode *N0 = Op.getOperand(0).getNode(); SDNode *N1 = Op.getOperand(1).getNode(); unsigned NewOpc = 0; - if (isSignExtended(N0, DAG) && isSignExtended(N1, DAG)) + bool isMLA = false; + bool isN0SExt = isSignExtended(N0, DAG); + bool isN1SExt = isSignExtended(N1, DAG); + if (isN0SExt && isN1SExt) NewOpc = ARMISD::VMULLs; - else if (isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG)) - NewOpc = ARMISD::VMULLu; - else if (VT == MVT::v2i64) - // Fall through to expand this. It is not legal. - return SDValue(); - else - // Other vector multiplications are legal. - return Op; + else { + bool isN0ZExt = isZeroExtended(N0, DAG); + bool isN1ZExt = isZeroExtended(N1, DAG); + if (isN0ZExt && isN1ZExt) + NewOpc = ARMISD::VMULLu; + else if (isN1SExt || isN1ZExt) { + // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these + // into (s/zext A * s/zext C) + (s/zext B * s/zext C) + if (isN1SExt && isAddSubSExt(N0, DAG)) { + NewOpc = ARMISD::VMULLs; + isMLA = true; + } else if (isN1ZExt && isAddSubZExt(N0, DAG)) { + NewOpc = ARMISD::VMULLu; + isMLA = true; + } else if (isN0ZExt && isAddSubZExt(N1, DAG)) { + std::swap(N0, N1); + NewOpc = ARMISD::VMULLu; + isMLA = true; + } + } + + if (!NewOpc) { + if (VT == MVT::v2i64) + // Fall through to expand this. It is not legal. + return SDValue(); + else + // Other vector multiplications are legal. + return Op; + } + } // Legalize to a VMULL instruction. DebugLoc DL = Op.getDebugLoc(); - SDValue Op0 = SkipExtension(N0, DAG); + SDValue Op0; SDValue Op1 = SkipExtension(N1, DAG); - - assert(Op0.getValueType().is64BitVector() && - Op1.getValueType().is64BitVector() && - "unexpected types for extended operands to VMULL"); - return DAG.getNode(NewOpc, DL, VT, Op0, Op1); + if (!isMLA) { + Op0 = SkipExtension(N0, DAG); + assert(Op0.getValueType().is64BitVector() && + Op1.getValueType().is64BitVector() && + "unexpected types for extended operands to VMULL"); + return DAG.getNode(NewOpc, DL, VT, Op0, Op1); + } + + // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during + // isel lowering to take advantage of no-stall back to back vmul + vmla. + // vmull q0, d4, d6 + // vmlal q0, d5, d6 + // is faster than + // vaddl q0, d4, d5 + // vmovl q1, d6 + // vmul q0, q0, q1 + SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG); + SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG); + EVT Op1VT = Op1.getValueType(); + return DAG.getNode(N0->getOpcode(), DL, VT, + DAG.getNode(NewOpc, DL, VT, + DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1), + DAG.getNode(NewOpc, DL, VT, + DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); } -static SDValue +static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { // Convert to float // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); @@ -4331,7 +4621,7 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y); // Get reciprocal estimate. // float4 recip = vrecpeq_f32(yf); - Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y); // Because char has a smaller range than uchar, we can actually get away // without any newton steps. This requires that we use a weird bias @@ -4349,7 +4639,7 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { return X; } -static SDValue +static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) { SDValue N2; // Convert to float. @@ -4359,13 +4649,13 @@ LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) { N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1); N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); - + // Use reciprocal estimate and one refinement step. // float4 recip = vrecpeq_f32(yf); // recip *= vrecpsq_f32(yf, recip); - N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1); - N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), N1, N2); N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); @@ -4395,15 +4685,15 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2, N3; - + if (VT == MVT::v8i8) { N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0); N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1); - + N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(4)); N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, - DAG.getIntPtrConstant(4)); + DAG.getIntPtrConstant(4)); N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(0)); N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, @@ -4414,7 +4704,7 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); N0 = LowerCONCAT_VECTORS(N0, DAG); - + N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0); return N0; } @@ -4430,32 +4720,32 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2, N3; - + if (VT == MVT::v8i8) { N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0); N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1); - + N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(4)); N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, - DAG.getIntPtrConstant(4)); + DAG.getIntPtrConstant(4)); N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(0)); N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, DAG.getIntPtrConstant(0)); - + N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16 - + N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); N0 = LowerCONCAT_VECTORS(N0, DAG); - - N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, + + N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32), N0); return N0; } - + // v4i16 sdiv ... Convert to float. // float4 yf = vcvt_f32_s32(vmovl_u16(y)); // float4 xf = vcvt_f32_s32(vmovl_u16(x)); @@ -4468,13 +4758,13 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { // float4 recip = vrecpeq_f32(yf); // recip *= vrecpsq_f32(yf, recip); // recip *= vrecpsq_f32(yf, recip); - N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1); - N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), N1, N2); N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); - N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), N1, N2); N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); @@ -4503,7 +4793,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::GlobalAddress: return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : LowerGlobalAddressELF(Op, DAG); - case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); @@ -4524,7 +4814,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, Subtarget); - case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); + case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); case ISD::SHL: case ISD::SRL: case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); @@ -4754,6 +5044,109 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, return BB; } +MachineBasicBlock * +ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size, + bool signExtend, + ARMCC::CondCodes Cond) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction *MF = BB->getParent(); + MachineFunction::iterator It = BB; + ++It; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptr = MI->getOperand(1).getReg(); + unsigned incr = MI->getOperand(2).getReg(); + unsigned oldval = dest; + DebugLoc dl = MI->getDebugLoc(); + + bool isThumb2 = Subtarget->isThumb2(); + unsigned ldrOpc, strOpc, extendOpc; + switch (Size) { + default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); + case 1: + ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; + strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; + extendOpc = isThumb2 ? ARM::t2SXTBr : ARM::SXTBr; + break; + case 2: + ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; + strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; + extendOpc = isThumb2 ? ARM::t2SXTHr : ARM::SXTHr; + break; + case 4: + ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; + strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; + extendOpc = 0; + break; + } + + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); + unsigned scratch2 = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); + + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loopMBB); + + // loopMBB: + // ldrex dest, ptr + // (sign extend dest, if required) + // cmp dest, incr + // cmov.cond scratch2, dest, incr + // strex scratch, scratch2, ptr + // cmp scratch, #0 + // bne- loopMBB + // fallthrough --> exitMBB + BB = loopMBB; + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); + + // Sign extend the value, if necessary. + if (signExtend && extendOpc) { + oldval = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); + AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval).addReg(dest)); + } + + // Build compare and cmov instructions. + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) + .addReg(oldval).addReg(incr)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) + .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR); + + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2) + .addReg(ptr)); + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) + .addReg(scratch).addImm(0)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + static MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), @@ -4763,6 +5156,72 @@ MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { llvm_unreachable("Expecting a BB with two successors!"); } +// FIXME: This opcode table should obviously be expressed in the target +// description. We probably just need a "machine opcode" value in the pseudo +// instruction. But the ideal solution maybe to simply remove the "S" version +// of the opcode altogether. +struct AddSubFlagsOpcodePair { + unsigned PseudoOpc; + unsigned MachineOpc; +}; + +static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { + {ARM::ADCSri, ARM::ADCri}, + {ARM::ADCSrr, ARM::ADCrr}, + {ARM::ADCSrs, ARM::ADCrs}, + {ARM::SBCSri, ARM::SBCri}, + {ARM::SBCSrr, ARM::SBCrr}, + {ARM::SBCSrs, ARM::SBCrs}, + {ARM::RSBSri, ARM::RSBri}, + {ARM::RSBSrr, ARM::RSBrr}, + {ARM::RSBSrs, ARM::RSBrs}, + {ARM::RSCSri, ARM::RSCri}, + {ARM::RSCSrs, ARM::RSCrs}, + {ARM::t2ADCSri, ARM::t2ADCri}, + {ARM::t2ADCSrr, ARM::t2ADCrr}, + {ARM::t2ADCSrs, ARM::t2ADCrs}, + {ARM::t2SBCSri, ARM::t2SBCri}, + {ARM::t2SBCSrr, ARM::t2SBCrr}, + {ARM::t2SBCSrs, ARM::t2SBCrs}, + {ARM::t2RSBSri, ARM::t2RSBri}, + {ARM::t2RSBSrs, ARM::t2RSBrs}, +}; + +// Convert and Add or Subtract with Carry and Flags to a generic opcode with +// CPSR<def> operand. e.g. ADCS (...) -> ADC (... CPSR<def>). +// +// FIXME: Somewhere we should assert that CPSR<def> is in the correct +// position to be recognized by the target descrition as the 'S' bit. +bool ARMTargetLowering::RemapAddSubWithFlags(MachineInstr *MI, + MachineBasicBlock *BB) const { + unsigned OldOpc = MI->getOpcode(); + unsigned NewOpc = 0; + + // This is only called for instructions that need remapping, so iterating over + // the tiny opcode table is not costly. + static const int NPairs = + sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair); + for (AddSubFlagsOpcodePair *Pair = &AddSubFlagsOpcodeMap[0], + *End = &AddSubFlagsOpcodeMap[NPairs]; Pair != End; ++Pair) { + if (OldOpc == Pair->PseudoOpc) { + NewOpc = Pair->MachineOpc; + break; + } + } + if (!NewOpc) + return false; + + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); + for (unsigned i = 0; i < MI->getNumOperands(); ++i) + MIB.addOperand(MI->getOperand(i)); + AddDefaultPred(MIB); + MIB.addReg(ARM::CPSR, RegState::Define); // S bit + MI->eraseFromParent(); + return true; +} + MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -4770,10 +5229,13 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); switch (MI->getOpcode()) { - default: + default: { + if (RemapAddSubWithFlags(MI, BB)) + return BB; + MI->dump(); llvm_unreachable("Unexpected instr type to insert"); - + } case ARM::ATOMIC_LOAD_ADD_I8: return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); case ARM::ATOMIC_LOAD_ADD_I16: @@ -4816,6 +5278,34 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::ATOMIC_LOAD_SUB_I32: return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); + case ARM::ATOMIC_LOAD_MIN_I8: + return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT); + case ARM::ATOMIC_LOAD_MIN_I16: + return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT); + case ARM::ATOMIC_LOAD_MIN_I32: + return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT); + + case ARM::ATOMIC_LOAD_MAX_I8: + return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT); + case ARM::ATOMIC_LOAD_MAX_I16: + return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT); + case ARM::ATOMIC_LOAD_MAX_I32: + return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT); + + case ARM::ATOMIC_LOAD_UMIN_I8: + return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO); + case ARM::ATOMIC_LOAD_UMIN_I16: + return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO); + case ARM::ATOMIC_LOAD_UMIN_I32: + return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO); + + case ARM::ATOMIC_LOAD_UMAX_I8: + return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI); + case ARM::ATOMIC_LOAD_UMAX_I16: + return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI); + case ARM::ATOMIC_LOAD_UMAX_I32: + return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI); + case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); @@ -5034,6 +5524,42 @@ static SDValue PerformSUBCombine(SDNode *N, return SDValue(); } +/// PerformVMULCombine +/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the +/// special multiplier accumulator forwarding. +/// vmul d3, d0, d2 +/// vmla d3, d1, d2 +/// is faster than +/// vadd d3, d0, d1 +/// vmul d3, d3, d2 +static SDValue PerformVMULCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + if (!Subtarget->hasVMLxForwarding()) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + unsigned Opcode = N0.getOpcode(); + if (Opcode != ISD::ADD && Opcode != ISD::SUB && + Opcode != ISD::FADD && Opcode != ISD::FSUB) { + Opcode = N0.getOpcode(); + if (Opcode != ISD::ADD && Opcode != ISD::SUB && + Opcode != ISD::FADD && Opcode != ISD::FSUB) + return SDValue(); + std::swap(N0, N1); + } + + EVT VT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + SDValue N00 = N0->getOperand(0); + SDValue N01 = N0->getOperand(1); + return DAG.getNode(Opcode, DL, VT, + DAG.getNode(ISD::MUL, DL, VT, N00, N1), + DAG.getNode(ISD::MUL, DL, VT, N01, N1)); +} + static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { @@ -5046,6 +5572,8 @@ static SDValue PerformMULCombine(SDNode *N, return SDValue(); EVT VT = N->getValueType(0); + if (VT.is64BitVector() || VT.is128BitVector()) + return PerformVMULCombine(N, DCI, Subtarget); if (VT != MVT::i32) return SDValue(); @@ -5088,12 +5616,16 @@ static SDValue PerformMULCombine(SDNode *N, static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { + // Attempt to use immediate-form VBIC BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; + if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; @@ -5127,6 +5659,9 @@ static SDValue PerformORCombine(SDNode *N, EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; + if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; @@ -5147,6 +5682,37 @@ static SDValue PerformORCombine(SDNode *N, } } + SDValue N0 = N->getOperand(0); + if (N0.getOpcode() != ISD::AND) + return SDValue(); + SDValue N1 = N->getOperand(1); + + // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. + if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && + DAG.getTargetLoweringInfo().isTypeLegal(VT)) { + APInt SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1)); + APInt SplatBits0; + if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, + HasAnyUndefs) && !HasAnyUndefs) { + BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1)); + APInt SplatBits1; + if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, + HasAnyUndefs) && !HasAnyUndefs && + SplatBits0 == ~SplatBits1) { + // Canonicalize the vector type to make instruction selection simpler. + EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; + SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, + N0->getOperand(1), N0->getOperand(0), + N1->getOperand(0)); + return DAG.getNode(ISD::BITCAST, dl, VT, Result); + } + } + } + // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when // reasonable. @@ -5154,19 +5720,16 @@ static SDValue PerformORCombine(SDNode *N, if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) return SDValue(); - SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); DebugLoc DL = N->getDebugLoc(); // 1) or (and A, mask), val => ARMbfi A, val, mask // iff (val & mask) == val // // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) - // && CountPopulation_32(mask) == CountPopulation_32(~mask2) + // && mask == ~mask2 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) - // && CountPopulation_32(mask) == CountPopulation_32(~mask2) + // && ~mask == mask2 // (i.e., copy a bitfield value into another bitfield of the same width) - if (N0.getOpcode() != ISD::AND) - return SDValue(); if (VT != MVT::i32) return SDValue(); @@ -5209,26 +5772,26 @@ static SDValue PerformORCombine(SDNode *N, return SDValue(); unsigned Mask2 = N11C->getZExtValue(); + // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern + // as is to match. if (ARM::isBitFieldInvertedMask(Mask) && - ARM::isBitFieldInvertedMask(~Mask2) && - (CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) { + (Mask == ~Mask2)) { // The pack halfword instruction works better for masks that fit it, // so use that when it's available. if (Subtarget->hasT2ExtractPack() && (Mask == 0xffff || Mask == 0xffff0000)) return SDValue(); // 2a - unsigned lsb = CountTrailingZeros_32(Mask2); + unsigned amt = CountTrailingZeros_32(Mask2); Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), - DAG.getConstant(lsb, MVT::i32)); + DAG.getConstant(amt, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, DAG.getConstant(Mask, MVT::i32)); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); return SDValue(); } else if (ARM::isBitFieldInvertedMask(~Mask) && - ARM::isBitFieldInvertedMask(Mask2) && - (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) { + (~Mask == Mask2)) { // The pack halfword instruction works better for masks that fit it, // so use that when it's available. if (Subtarget->hasT2ExtractPack() && @@ -5239,7 +5802,7 @@ static SDValue PerformORCombine(SDNode *N, Res = DAG.getNode(ISD::SRL, DL, VT, N00, DAG.getConstant(lsb, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, - DAG.getConstant(Mask2, MVT::i32)); + DAG.getConstant(Mask2, MVT::i32)); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); return SDValue(); @@ -5294,6 +5857,37 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, SDValue InDouble = N->getOperand(0); if (InDouble.getOpcode() == ARMISD::VMOVDRR) return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); + + // vmovrrd(load f64) -> (load i32), (load i32) + SDNode *InNode = InDouble.getNode(); + if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() && + InNode->getValueType(0) == MVT::f64 && + InNode->getOperand(1).getOpcode() == ISD::FrameIndex && + !cast<LoadSDNode>(InNode)->isVolatile()) { + // TODO: Should this be done for non-FrameIndex operands? + LoadSDNode *LD = cast<LoadSDNode>(InNode); + + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = LD->getDebugLoc(); + SDValue BasePtr = LD->getBasePtr(); + SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, + LD->getPointerInfo(), LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); + + SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, + DAG.getConstant(4, MVT::i32)); + SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, + LD->getPointerInfo(), LD->isVolatile(), + LD->isNonTemporal(), + std::min(4U, LD->getAlignment() / 2)); + + DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); + SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); + DCI.RemoveFromWorklist(LD); + DAG.DeleteNode(LD); + return Result; + } + return SDValue(); } @@ -5323,8 +5917,28 @@ static SDValue PerformSTORECombine(SDNode *N, // Otherwise, the i64 value will be legalized to a pair of i32 values. StoreSDNode *St = cast<StoreSDNode>(N); SDValue StVal = St->getValue(); - if (!ISD::isNormalStore(St) || St->isVolatile() || - StVal.getValueType() != MVT::i64 || + if (!ISD::isNormalStore(St) || St->isVolatile()) + return SDValue(); + + if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && + StVal.getNode()->hasOneUse() && !St->isVolatile()) { + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = St->getDebugLoc(); + SDValue BasePtr = St->getBasePtr(); + SDValue NewST1 = DAG.getStore(St->getChain(), DL, + StVal.getNode()->getOperand(0), BasePtr, + St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment()); + + SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, + DAG.getConstant(4, MVT::i32)); + return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1), + OffsetPtr, St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), + std::min(4U, St->getAlignment() / 2)); + } + + if (StVal.getValueType() != MVT::i64 || StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); @@ -5553,7 +6167,7 @@ static SDValue CombineBaseUpdate(SDNode *N, EVT VecTy; if (isLoad) VecTy = N->getValueType(0); - else + else VecTy = N->getOperand(AddrOpIdx+1).getValueType(); unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; if (isLaneOp) @@ -5603,7 +6217,7 @@ static SDValue CombineBaseUpdate(SDNode *N, DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); break; - } + } return SDValue(); } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index dc400c4..a2e6260 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -57,7 +57,6 @@ namespace llvm { CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR. FMSTAT, // ARM fmstat instruction. CMOV, // ARM conditional move instructions. - CNEG, // ARM conditional negate instructions. BCC_i64, @@ -89,7 +88,7 @@ namespace llvm { MEMBARRIER_MCR, // Memory barrier (MCR) PRELOAD, // Preload - + VCEQ, // Vector compare equal. VCEQZ, // Vector compare equal to zero. VCGE, // Vector compare greater than or equal. @@ -154,6 +153,8 @@ namespace llvm { VZIP, // zip (interleave) VUZP, // unzip (deinterleave) VTRN, // transpose + VTBL1, // 1-register shuffle with mask + VTBL2, // 2-register shuffle with mask // Vector multiply long: VMULLs, // ...signed @@ -172,12 +173,15 @@ namespace llvm { // Bit-field insert BFI, - + // Vector OR with immediate VORRIMM, // Vector AND with NOT of immediate VBICIMM, + // Vector bitwise select + VBSL, + // Vector load N-element structure to all lanes: VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE, VLD3DUP, @@ -330,9 +334,6 @@ namespace llvm { Sched::Preference getSchedulingPreference(SDNode *N) const; - unsigned getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const; - bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; @@ -407,7 +408,7 @@ namespace llvm { SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const; SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; @@ -425,6 +426,13 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; + void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, + DebugLoc dl, SDValue &Chain, unsigned ArgOffset) + const; + + void computeRegArea(CCState &CCInfo, MachineFunction &MF, + unsigned &VARegSize, unsigned &VARegSaveSize) const; + virtual SDValue LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, @@ -435,6 +443,9 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; + /// HandleByVal - Target-specific cleanup for ByVal support. + virtual void HandleByVal(CCState *, unsigned &) const; + /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. @@ -456,10 +467,13 @@ namespace llvm { virtual bool isUsedByReturnOnly(SDNode *N) const; + virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; + SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const; SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, DebugLoc dl) const; + SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const; SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const; @@ -470,16 +484,22 @@ namespace llvm { MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode) const; + MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size, + bool signExtend, + ARMCC::CondCodes Cond) const; + bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const; }; - + enum NEONModImmType { VMOVModImm, VMVNModImm, OtherModImm }; - - + + namespace ARM { FastISel *createFastISel(FunctionLoweringInfo &funcInfo); } diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 359ac45..f5fb98e 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -206,19 +206,30 @@ def setend_op : Operand<i32> { let PrintMethod = "printSetendOperand"; } -def cps_opt : Operand<i32> { - let PrintMethod = "printCPSOptionOperand"; -} - def msr_mask : Operand<i32> { let PrintMethod = "printMSRMaskOperand"; let ParserMatchClass = MSRMaskOperand; } -// A8.6.117, A8.6.118. Different instructions are generated for #0 and #-0. -// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc. -def neg_zero : Operand<i32> { - let PrintMethod = "printNegZeroOperand"; +// Shift Right Immediate - A shift right immediate is encoded differently from +// other shift immediates. The imm6 field is encoded like so: +// +// Offset Encoding +// 8 imm6<5:3> = '001', 8 - <imm> is encoded in imm6<2:0> +// 16 imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0> +// 32 imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0> +// 64 64 - <imm> is encoded in imm6<5:0> +def shr_imm8 : Operand<i32> { + let EncoderMethod = "getShiftRight8Imm"; +} +def shr_imm16 : Operand<i32> { + let EncoderMethod = "getShiftRight16Imm"; +} +def shr_imm32 : Operand<i32> { + let EncoderMethod = "getShiftRight32Imm"; +} +def shr_imm64 : Operand<i32> { + let EncoderMethod = "getShiftRight64Imm"; } //===----------------------------------------------------------------------===// @@ -279,6 +290,7 @@ class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern> let OutOperandList = oops; let InOperandList = iops; let Pattern = pattern; + let isCodeGenOnly = 1; } // PseudoInst that's ARM-mode only. @@ -422,11 +434,11 @@ class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, opc, asm, "", pattern> { bits<4> Rd; bits<4> Rt; - bits<4> Rn; + bits<4> addr; let Inst{27-23} = 0b00011; let Inst{22-21} = opcod; let Inst{20} = 0; - let Inst{19-16} = Rn; + let Inst{19-16} = addr; let Inst{15-12} = Rd; let Inst{11-4} = 0b11111001; let Inst{3-0} = Rt; @@ -513,6 +525,24 @@ class AI2stridx<bit isByte, bit isPre, dag oops, dag iops, let Inst{19-16} = Rn; let Inst{11-0} = offset{11-0}; } +// FIXME: Merge with the above class when addrmode2 gets used for STR, STRB +// but for now use this class for STRT and STRBT. +class AI2stridxT<bit isByte, bit isPre, dag oops, dag iops, + IndexMode im, Format f, InstrItinClass itin, string opc, + string asm, string cstr, list<dag> pattern> + : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr, + pattern> { + // AM2 store w/ two operands: (GPR, am2offset) + // {17-14} Rn + // {13} 1 == Rm, 0 == imm12 + // {12} isAdd + // {11-0} imm12/Rm + bits<18> addr; + let Inst{25} = addr{13}; + let Inst{23} = addr{12}; + let Inst{19-16} = addr{17-14}; + let Inst{11-0} = addr{11-0}; +} // addrmode3 instructions class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f, @@ -547,6 +577,34 @@ class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops, let Inst{15-12} = Rt; // Rt let Inst{7-4} = op; } + +// FIXME: Merge with the above class when addrmode2 gets used for LDR, LDRB +// but for now use this class for LDRSBT, LDRHT, LDSHT. +class AI3ldstidxT<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops, + IndexMode im, Format f, InstrItinClass itin, string opc, + string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, im, f, itin, + opc, asm, cstr, pattern> { + // {13} 1 == imm8, 0 == Rm + // {12-9} Rn + // {8} isAdd + // {7-4} imm7_4/zero + // {3-0} imm3_0/Rm + bits<14> addr; + bits<4> Rt; + let Inst{27-25} = 0b000; + let Inst{24} = isPre; // P bit + let Inst{23} = addr{8}; // U bit + let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm + let Inst{20} = op20; // L bit + let Inst{19-16} = addr{12-9}; // Rn + let Inst{15-12} = Rt; // Rt + let Inst{11-8} = addr{7-4}; // imm7_4/zero + let Inst{7-4} = op; + let Inst{3-0} = addr{3-0}; // imm3_0/Rm + let AsmMatchConverter = "CvtLdWriteBackRegAddrMode3"; +} + class AI3stridx<bits<4> op, bit isByte, bit isPre, dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> @@ -619,12 +677,25 @@ class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin, opc, asm, cstr,pattern> { + // {13} 1 == imm8, 0 == Rm + // {12-9} Rn + // {8} isAdd + // {7-4} imm7_4/zero + // {3-0} imm3_0/Rm + bits<14> addr; + bits<4> Rt; + let Inst{3-0} = addr{3-0}; // imm3_0/Rm let Inst{4} = 1; let Inst{5} = 1; // H bit let Inst{6} = 0; // S bit let Inst{7} = 1; + let Inst{11-8} = addr{7-4}; // imm7_4/zero + let Inst{15-12} = Rt; // Rt + let Inst{19-16} = addr{12-9}; // Rn let Inst{20} = 0; // L bit let Inst{21} = 0; // W bit + let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm + let Inst{23} = addr{8}; // U bit let Inst{24} = 0; // P bit let Inst{27-25} = 0b000; } @@ -1670,7 +1741,8 @@ class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, } // NEON 3 vector register format. -class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, + +class N3VCommon<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> { @@ -1680,6 +1752,13 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, let Inst{11-8} = op11_8; let Inst{6} = op6; let Inst{4} = op4; +} + +class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, + dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list<dag> pattern> + : N3VCommon<op24, op23, op21_20, op11_8, op6, op4, + oops, iops, f, itin, opc, dt, asm, cstr, pattern> { // Instruction operands. bits<5> Vd; @@ -1694,6 +1773,47 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, let Inst{5} = Vm{4}; } +class N3VLane32<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, + dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list<dag> pattern> + : N3VCommon<op24, op23, op21_20, op11_8, op6, op4, + oops, iops, f, itin, opc, dt, asm, cstr, pattern> { + + // Instruction operands. + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + bit lane; + + let Inst{15-12} = Vd{3-0}; + let Inst{22} = Vd{4}; + let Inst{19-16} = Vn{3-0}; + let Inst{7} = Vn{4}; + let Inst{3-0} = Vm{3-0}; + let Inst{5} = lane; +} + +class N3VLane16<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, + dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list<dag> pattern> + : N3VCommon<op24, op23, op21_20, op11_8, op6, op4, + oops, iops, f, itin, opc, dt, asm, cstr, pattern> { + + // Instruction operands. + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + bits<2> lane; + + let Inst{15-12} = Vd{3-0}; + let Inst{22} = Vd{4}; + let Inst{19-16} = Vn{3-0}; + let Inst{7} = Vn{4}; + let Inst{2-0} = Vm{2-0}; + let Inst{5} = lane{1}; + let Inst{3} = lane{0}; +} + // Same as N3V except it doesn't have a data type suffix. class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, @@ -1730,6 +1850,8 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, let Inst{11-8} = opcod2; let Inst{6-5} = opcod3; let Inst{4} = 1; + // A8.6.303, A8.6.328, A8.6.329 + let Inst{3-0} = 0b0000; let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 6e3fe2e..209c1a3 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -58,7 +58,7 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>; -def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; +def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 0, []>; def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>; @@ -93,8 +93,6 @@ def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone, def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov, [SDNPInGlue]>; -def ARMcneg : SDNode<"ARMISD::CNEG", SDT_ARMCMov, - [SDNPInGlue]>; def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; @@ -205,13 +203,13 @@ def so_imm_not_XFORM : SDNodeXForm<imm, [{ }]>; /// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15]. -def imm1_15 : PatLeaf<(i32 imm), [{ - return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 16; +def imm1_15 : ImmLeaf<i32, [{ + return (int32_t)Imm >= 1 && (int32_t)Imm < 16; }]>; /// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31]. -def imm16_31 : PatLeaf<(i32 imm), [{ - return (int32_t)N->getZExtValue() >= 16 && (int32_t)N->getZExtValue() < 32; +def imm16_31 : ImmLeaf<i32, [{ + return (int32_t)Imm >= 16 && (int32_t)Imm < 32; }]>; def so_imm_neg : @@ -241,8 +239,8 @@ def lo16AllZero : PatLeaf<(i32 imm), [{ /// imm0_65535 predicate - True if the 32-bit immediate is in the range /// [0.65535]. -def imm0_65535 : PatLeaf<(i32 imm), [{ - return (uint32_t)N->getZExtValue() < 65536; +def imm0_65535 : ImmLeaf<i32, [{ + return Imm >= 0 && Imm < 65536; }]>; class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>; @@ -377,17 +375,23 @@ def neon_vcvt_imm32 : Operand<i32> { } // rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24. -def rot_imm : Operand<i32>, PatLeaf<(i32 imm), [{ - int32_t v = (int32_t)N->getZExtValue(); +def rot_imm : Operand<i32>, ImmLeaf<i32, [{ + int32_t v = (int32_t)Imm; return v == 8 || v == 16 || v == 24; }]> { let EncoderMethod = "getRotImmOpValue"; } +def ShifterAsmOperand : AsmOperandClass { + let Name = "Shifter"; + let SuperClasses = []; +} + // shift_imm: An integer that encodes a shift amount and the type of shift // (currently either asr or lsl) using the same encoding used for the // immediates in so_reg operands. def shift_imm : Operand<i32> { let PrintMethod = "printShiftImmOperand"; + let ParserMatchClass = ShifterAsmOperand; } // shifter_operand operands: so_reg and so_imm. @@ -396,19 +400,21 @@ def so_reg : Operand<i32>, // reg reg imm [shl,srl,sra,rotr]> { let EncoderMethod = "getSORegOpValue"; let PrintMethod = "printSORegOperand"; - let MIOperandInfo = (ops GPR, GPR, i32imm); + let MIOperandInfo = (ops GPR, GPR, shift_imm); } def shift_so_reg : Operand<i32>, // reg reg imm ComplexPattern<i32, 3, "SelectShiftShifterOperandReg", [shl,srl,sra,rotr]> { let EncoderMethod = "getSORegOpValue"; let PrintMethod = "printSORegOperand"; - let MIOperandInfo = (ops GPR, GPR, i32imm); + let MIOperandInfo = (ops GPR, GPR, shift_imm); } // so_imm - Match a 32-bit shifter_operand immediate operand, which is an // 8-bit immediate rotated by an arbitrary number of bits. -def so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_so_imm(N); }]> { +def so_imm : Operand<i32>, ImmLeaf<i32, [{ + return ARM_AM::getSOImmVal(Imm) != -1; + }]> { let EncoderMethod = "getSOImmOpValue"; let PrintMethod = "printSOImmOperand"; } @@ -429,13 +435,13 @@ def arm_i32imm : PatLeaf<(imm), [{ }]>; /// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31]. -def imm0_31 : Operand<i32>, PatLeaf<(imm), [{ - return (int32_t)N->getZExtValue() < 32; +def imm0_31 : Operand<i32>, ImmLeaf<i32, [{ + return Imm >= 0 && Imm < 32; }]>; /// imm0_31_m1 - Matches and prints like imm0_31, but encodes as 'value - 1'. -def imm0_31_m1 : Operand<i32>, PatLeaf<(imm), [{ - return (int32_t)N->getZExtValue() < 32; +def imm0_31_m1 : Operand<i32>, ImmLeaf<i32, [{ + return Imm >= 0 && Imm < 32; }]> { let EncoderMethod = "getImmMinusOneOpValue"; } @@ -458,19 +464,30 @@ def bf_inv_mask_imm : Operand<i32>, } /// lsb_pos_imm - position of the lsb bit, used by BFI4p and t2BFI4p -def lsb_pos_imm : Operand<i32>, PatLeaf<(imm), [{ - return isInt<5>(N->getSExtValue()); +def lsb_pos_imm : Operand<i32>, ImmLeaf<i32, [{ + return isInt<5>(Imm); }]>; /// width_imm - number of bits to be copied, used by BFI4p and t2BFI4p -def width_imm : Operand<i32>, PatLeaf<(imm), [{ - return N->getSExtValue() > 0 && N->getSExtValue() <= 32; +def width_imm : Operand<i32>, ImmLeaf<i32, [{ + return Imm > 0 && Imm <= 32; }] > { let EncoderMethod = "getMsbOpValue"; } // Define ARM specific addressing modes. +def MemMode2AsmOperand : AsmOperandClass { + let Name = "MemMode2"; + let SuperClasses = []; + let ParserMethod = "tryParseMemMode2Operand"; +} + +def MemMode3AsmOperand : AsmOperandClass { + let Name = "MemMode3"; + let SuperClasses = []; + let ParserMethod = "tryParseMemMode3Operand"; +} // addrmode_imm12 := reg +/- imm12 // @@ -501,6 +518,7 @@ def addrmode2 : Operand<i32>, ComplexPattern<i32, 3, "SelectAddrMode2", []> { let EncoderMethod = "getAddrMode2OpValue"; let PrintMethod = "printAddrMode2Operand"; + let ParserMatchClass = MemMode2AsmOperand; let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); } @@ -519,6 +537,7 @@ def addrmode3 : Operand<i32>, ComplexPattern<i32, 3, "SelectAddrMode3", []> { let EncoderMethod = "getAddrMode3OpValue"; let PrintMethod = "printAddrMode3Operand"; + let ParserMatchClass = MemMode3AsmOperand; let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); } @@ -586,6 +605,21 @@ def addrmodepc : Operand<i32>, let MIOperandInfo = (ops GPR, i32imm); } +def MemMode7AsmOperand : AsmOperandClass { + let Name = "MemMode7"; + let SuperClasses = []; +} + +// addrmode7 := reg +// Used by load/store exclusive instructions. Useful to enable right assembly +// parsing and printing. Not used for any codegen matching. +// +def addrmode7 : Operand<i32> { + let PrintMethod = "printAddrMode7Operand"; + let MIOperandInfo = (ops GPR); + let ParserMatchClass = MemMode7AsmOperand; +} + def nohash_imm : Operand<i32> { let PrintMethod = "printNoHashImmediate"; } @@ -902,52 +936,23 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{19-16} = Rn; } } +} + // Carry setting variants -let isCodeGenOnly = 1, Defs = [CPSR] in { -multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, - bit Commutable = 0> { - def Sri : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), - DPFrm, IIC_iALUi, !strconcat(opc, "\t$Rd, $Rn, $imm"), - [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>, - Requires<[IsARM]> { - bits<4> Rd; - bits<4> Rn; - bits<12> imm; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - let Inst{11-0} = imm; - let Inst{20} = 1; - let Inst{25} = 1; - } - def Srr : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - DPFrm, IIC_iALUr, !strconcat(opc, "\t$Rd, $Rn, $Rm"), - [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM]> { - bits<4> Rd; - bits<4> Rn; - bits<4> Rm; - let Inst{11-4} = 0b00000000; +// NOTE: CPSR def omitted because it will be handled by the custom inserter. +let usesCustomInserter = 1 in { +multiclass AI1_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> { + def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), + Size4Bytes, IIC_iALUi, + [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>; + def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + Size4Bytes, IIC_iALUr, + [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> { let isCommutable = Commutable; - let Inst{3-0} = Rm; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - let Inst{20} = 1; - let Inst{25} = 0; - } - def Srs : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$Rd, $Rn, $shift"), - [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>, - Requires<[IsARM]> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{11-0} = shift; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - let Inst{20} = 1; - let Inst{25} = 0; } -} + def rs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), + Size4Bytes, IIC_iALUsr, + [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>; } } @@ -972,6 +977,7 @@ multiclass AI_ldr1<bit isByte, string opc, InstrItinClass iii, [(set GPR:$Rt, (opnode ldst_so_reg:$shift))]> { bits<4> Rt; bits<17> shift; + let shift{4} = 0; // Inst{4} = 0 let Inst{23} = shift{12}; // U (add = ('U' == 1)) let Inst{19-16} = shift{16-13}; // Rn let Inst{15-12} = Rt; @@ -1001,6 +1007,7 @@ multiclass AI_str1<bit isByte, string opc, InstrItinClass iii, [(opnode GPR:$Rt, ldst_so_reg:$shift)]> { bits<4> Rt; bits<17> shift; + let shift{4} = 0; // Inst{4} = 0 let Inst{23} = shift{12}; // U (add = ('U' == 1)) let Inst{19-16} = shift{16-13}; // Rn let Inst{15-12} = Rt; @@ -1249,7 +1256,7 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in // The 'adr' mnemonic encodes differently if the label is before or after // the instruction. The {24-21} opcode bits are set by the fixup, as we don't // know until then which form of the instruction will be used. -def ADR : AI1<0, (outs GPR:$Rd), (ins adrlabel:$label), +def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label), MiscFrm, IIC_iALUi, "adr", "\t$Rd, #$label", []> { bits<4> Rd; bits<12> label; @@ -1311,6 +1318,9 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { // before calls from potentially appearing dead. let isCall = 1, // On non-Darwin platforms R9 is callee-saved. + // FIXME: Do we really need a non-predicated version? If so, it should + // at least be a pseudo instruction expanding to the predicated version + // at MC lowering time. Defs = [R0, R1, R2, R3, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, D16, D17, D18, D19, D20, D21, D22, D23, @@ -1340,7 +1350,16 @@ let isCall = 1, Requires<[IsARM, HasV5T, IsNotDarwin]> { bits<4> func; let Inst{31-4} = 0b1110000100101111111111110011; - let Inst{3-0} = func; + let Inst{3-0} = func; + } + + def BLX_pred : AI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, + IIC_Br, "blx", "\t$func", + [(ARMcall_pred GPR:$func)]>, + Requires<[IsARM, HasV5T, IsNotDarwin]> { + bits<4> func; + let Inst{27-4} = 0b000100101111111111110011; + let Inst{3-0} = func; } // ARMv4T @@ -1364,30 +1383,25 @@ let isCall = 1, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR], Uses = [R7, SP] in { - def BLr9 : ABXI<0b1011, (outs), (ins bltarget:$func, variable_ops), - IIC_Br, "bl\t$func", - [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]> { - let Inst{31-28} = 0b1110; - bits<24> func; - let Inst{23-0} = func; - } + def BLr9 : ARMPseudoInst<(outs), (ins bltarget:$func, variable_ops), + Size4Bytes, IIC_Br, + [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]>; - def BLr9_pred : ABI<0b1011, (outs), (ins bltarget:$func, variable_ops), - IIC_Br, "bl", "\t$func", + def BLr9_pred : ARMPseudoInst<(outs), + (ins bltarget:$func, pred:$p, variable_ops), + Size4Bytes, IIC_Br, [(ARMcall_pred tglobaladdr:$func)]>, - Requires<[IsARM, IsDarwin]> { - bits<24> func; - let Inst{23-0} = func; - } + Requires<[IsARM, IsDarwin]>; // ARMv5T and above - def BLXr9 : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, - IIC_Br, "blx\t$func", - [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]> { - bits<4> func; - let Inst{31-4} = 0b1110000100101111111111110011; - let Inst{3-0} = func; - } + def BLXr9 : ARMPseudoInst<(outs), (ins GPR:$func, variable_ops), + Size4Bytes, IIC_Br, + [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]>; + + def BLXr9_pred: ARMPseudoInst<(outs), (ins GPR:$func, pred:$p, variable_ops), + Size4Bytes, IIC_Br, + [(ARMcall_pred GPR:$func)]>, + Requires<[IsARM, HasV5T, IsDarwin]>; // ARMv4T // Note: Restrict $func to the tGPR regclass to prevent it being in LR. @@ -1403,11 +1417,7 @@ let isCall = 1, // Tail calls. -// FIXME: These should probably be xformed into the non-TC versions of the -// instructions as part of MC lowering. -// FIXME: These seem to be used for both Thumb and ARM instruction selection. -// Thumb should have its own version since the instruction is actually -// different, even though the mnemonic is the same. +// FIXME: The Thumb versions of these should live in ARMInstrThumb.td let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // Darwin versions. let Defs = [R0, R1, R2, R3, R9, R12, @@ -1421,21 +1431,21 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), IIC_Br, []>, Requires<[IsDarwin]>; - def TAILJMPd : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), - IIC_Br, "b\t$dst @ TAILCALL", + def TAILJMPd : ARMPseudoInst<(outs), (ins brtarget:$dst, variable_ops), + Size4Bytes, IIC_Br, []>, Requires<[IsARM, IsDarwin]>; - def TAILJMPdt: ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), - IIC_Br, "b.w\t$dst @ TAILCALL", + def tTAILJMPd: tPseudoInst<(outs), (ins brtarget:$dst, variable_ops), + Size4Bytes, IIC_Br, []>, Requires<[IsThumb, IsDarwin]>; - def TAILJMPr : AXI<(outs), (ins tcGPR:$dst, variable_ops), - BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL", - []>, Requires<[IsDarwin]> { - bits<4> dst; - let Inst{31-4} = 0b1110000100101111111111110001; - let Inst{3-0} = dst; - } + def TAILJMPr : ARMPseudoInst<(outs), (ins tcGPR:$dst, variable_ops), + Size4Bytes, IIC_Br, + []>, Requires<[IsARM, IsDarwin]>; + + def tTAILJMPr : tPseudoInst<(outs), (ins tcGPR:$dst, variable_ops), + Size4Bytes, IIC_Br, + []>, Requires<[IsThumb, IsDarwin]>; } // Non-Darwin versions (the difference is R9). @@ -1450,34 +1460,31 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), IIC_Br, []>, Requires<[IsNotDarwin]>; - def TAILJMPdND : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), - IIC_Br, "b\t$dst @ TAILCALL", + def TAILJMPdND : ARMPseudoInst<(outs), (ins brtarget:$dst, variable_ops), + Size4Bytes, IIC_Br, []>, Requires<[IsARM, IsNotDarwin]>; - def TAILJMPdNDt : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), - IIC_Br, "b.w\t$dst @ TAILCALL", + def tTAILJMPdND : tPseudoInst<(outs), (ins brtarget:$dst, variable_ops), + Size4Bytes, IIC_Br, []>, Requires<[IsThumb, IsNotDarwin]>; - def TAILJMPrND : AXI<(outs), (ins tcGPR:$dst, variable_ops), - BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL", - []>, Requires<[IsNotDarwin]> { - bits<4> dst; - let Inst{31-4} = 0b1110000100101111111111110001; - let Inst{3-0} = dst; - } + def TAILJMPrND : ARMPseudoInst<(outs), (ins tcGPR:$dst, variable_ops), + Size4Bytes, IIC_Br, + []>, Requires<[IsARM, IsNotDarwin]>; + def tTAILJMPrND : tPseudoInst<(outs), (ins tcGPR:$dst, variable_ops), + Size4Bytes, IIC_Br, + []>, Requires<[IsThumb, IsNotDarwin]>; } } let isBranch = 1, isTerminator = 1 in { - // B is "predicable" since it can be xformed into a Bcc. + // B is "predicable" since it's just a Bcc with an 'always' condition. let isBarrier = 1 in { let isPredicable = 1 in - def B : ABXI<0b1010, (outs), (ins brtarget:$target), IIC_Br, - "b\t$target", [(br bb:$target)]> { - bits<24> target; - let Inst{31-28} = 0b1110; - let Inst{23-0} = target; - } + // FIXME: We shouldn't need this pseudo at all. Just using Bcc directly + // should be sufficient. + def B : ARMPseudoInst<(outs), (ins brtarget:$target), Size4Bytes, IIC_Br, + [(br bb:$target)]>; let isNotDuplicable = 1, isIndirectBranch = 1 in { def BR_JTr : ARMPseudoInst<(outs), @@ -1509,6 +1516,16 @@ let isBranch = 1, isTerminator = 1 in { } } +// BLX (immediate) -- for disassembly only +def BLXi : AXI<(outs), (ins br_target:$target), BrMiscFrm, NoItinerary, + "blx\t$target", [/* pattern left blank */]>, + Requires<[IsARM, HasV5T]> { + let Inst{31-25} = 0b1111101; + bits<25> target; + let Inst{23-0} = target{24-1}; + let Inst{24} = target{0}; +} + // Branch and Exchange Jazelle -- for disassembly only def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", [/* For disassembly only; pattern left blank */]> { @@ -1533,6 +1550,7 @@ def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", let Inst{23-0} = svc; } } +def : MnemonicAlias<"swi", "svc">; // Store Return State is a system instruction -- for disassembly only let isCodeGenOnly = 1 in { // FIXME: This should not use submode! @@ -1541,6 +1559,8 @@ def SRSW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode), [/* For disassembly only; pattern left blank */]> { let Inst{31-28} = 0b1111; let Inst{22-20} = 0b110; // W = 1 + let Inst{19-8} = 0xd05; + let Inst{7-5} = 0b000; } def SRS : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode), @@ -1548,6 +1568,8 @@ def SRS : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode), [/* For disassembly only; pattern left blank */]> { let Inst{31-28} = 0b1111; let Inst{22-20} = 0b100; // W = 0 + let Inst{19-8} = 0xd05; + let Inst{7-5} = 0b000; } // Return From Exception is a system instruction -- for disassembly only @@ -1556,6 +1578,7 @@ def RFEW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base), [/* For disassembly only; pattern left blank */]> { let Inst{31-28} = 0b1111; let Inst{22-20} = 0b011; // W = 1 + let Inst{15-0} = 0x0a00; } def RFE : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base), @@ -1563,6 +1586,7 @@ def RFE : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base), [/* For disassembly only; pattern left blank */]> { let Inst{31-28} = 0b1111; let Inst{22-20} = 0b001; // W = 0 + let Inst{15-0} = 0x0a00; } } // isCodeGenOnly = 1 @@ -1610,15 +1634,11 @@ def LDRSB : AI3ld<0b1101, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr", [(set GPR:$Rt, (sextloadi8 addrmode3:$addr))]>; -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1, - isCodeGenOnly = 1 in { // $dst2 doesn't exist in asmstring? -// FIXME: $dst2 isn't in the asm string as it's implied by $Rd (dst2 = Rd+1) -// how to represent that such that tblgen is happy and we don't -// mark this codegen only? +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoad_d_r, "ldrd", "\t$Rd, $addr", + IIC_iLoad_d_r, "ldrd", "\t$Rd, $dst2, $addr", []>, Requires<[IsARM, HasV5TE]>; } @@ -1636,6 +1656,7 @@ multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> { let Inst{23} = addr{12}; let Inst{19-16} = addr{17-14}; let Inst{11-0} = addr{11-0}; + let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2"; } def _POST : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins GPR:$Rn, am2offset:$offset), @@ -1688,40 +1709,80 @@ let mayLoad = 1, neverHasSideEffects = 1 in { defm LDRH : AI3_ldridx<0b1011, 1, "ldrh", IIC_iLoad_bh_ru>; defm LDRSH : AI3_ldridx<0b1111, 1, "ldrsh", IIC_iLoad_bh_ru>; defm LDRSB : AI3_ldridx<0b1101, 1, "ldrsb", IIC_iLoad_bh_ru>; -let hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in -defm LDRD : AI3_ldridx<0b1101, 0, "ldrd", IIC_iLoad_d_ru>; +let hasExtraDefRegAllocReq = 1 in { +def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), + (ins addrmode3:$addr), IndexModePre, + LdMiscFrm, IIC_iLoad_d_ru, + "ldrd", "\t$Rt, $Rt2, $addr!", + "$addr.base = $Rn_wb", []> { + bits<14> addr; + let Inst{23} = addr{8}; // U bit + let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm + let Inst{19-16} = addr{12-9}; // Rn + let Inst{11-8} = addr{7-4}; // imm7_4/zero + let Inst{3-0} = addr{3-0}; // imm3_0/Rm +} +def LDRD_POST: AI3ldstidx<0b1101, 0, 1, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), + (ins GPR:$Rn, am3offset:$offset), IndexModePost, + LdMiscFrm, IIC_iLoad_d_ru, + "ldrd", "\t$Rt, $Rt2, [$Rn], $offset", + "$Rn = $Rn_wb", []> { + bits<10> offset; + bits<4> Rn; + let Inst{23} = offset{8}; // U bit + let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm + let Inst{19-16} = Rn; + let Inst{11-8} = offset{7-4}; // imm7_4/zero + let Inst{3-0} = offset{3-0}; // imm3_0/Rm +} +} // hasExtraDefRegAllocReq = 1 } // mayLoad = 1, neverHasSideEffects = 1 // LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only. let mayLoad = 1, neverHasSideEffects = 1 in { -def LDRT : AI2ldstidx<1, 0, 0, (outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, am2offset:$offset), IndexModeNone, - LdFrm, IIC_iLoad_ru, - "ldrt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { +def LDRT : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$base_wb), + (ins addrmode2:$addr), IndexModePost, LdFrm, IIC_iLoad_ru, + "ldrt", "\t$Rt, $addr", "$addr.base = $base_wb", []> { + // {17-14} Rn + // {13} 1 == Rm, 0 == imm12 + // {12} isAdd + // {11-0} imm12/Rm + bits<18> addr; + let Inst{25} = addr{13}; + let Inst{23} = addr{12}; let Inst{21} = 1; // overwrite -} -def LDRBT : AI2ldstidx<1, 1, 0, (outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, am2offset:$offset), IndexModeNone, - LdFrm, IIC_iLoad_bh_ru, - "ldrbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { + let Inst{19-16} = addr{17-14}; + let Inst{11-0} = addr{11-0}; + let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2"; +} +def LDRBT : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$base_wb), + (ins addrmode2:$addr), IndexModePost, LdFrm, IIC_iLoad_bh_ru, + "ldrbt", "\t$Rt, $addr", "$addr.base = $base_wb", []> { + // {17-14} Rn + // {13} 1 == Rm, 0 == imm12 + // {12} isAdd + // {11-0} imm12/Rm + bits<18> addr; + let Inst{25} = addr{13}; + let Inst{23} = addr{12}; let Inst{21} = 1; // overwrite + let Inst{19-16} = addr{17-14}; + let Inst{11-0} = addr{11-0}; + let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2"; } -def LDRSBT : AI3ldstidx<0b1101, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, am3offset:$offset), IndexModePost, - LdMiscFrm, IIC_iLoad_bh_ru, - "ldrsbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { +def LDRSBT : AI3ldstidxT<0b1101, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb), + (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, + "ldrsbt", "\t$Rt, $addr", "$addr.base = $base_wb", []> { let Inst{21} = 1; // overwrite } -def LDRHT : AI3ldstidx<0b1011, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, am3offset:$offset), IndexModePost, - LdMiscFrm, IIC_iLoad_bh_ru, - "ldrht", "\t$dst, [$base], $offset", "$base = $base_wb", []> { +def LDRHT : AI3ldstidxT<0b1011, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb), + (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, + "ldrht", "\t$Rt, $addr", "$addr.base = $base_wb", []> { let Inst{21} = 1; // overwrite } -def LDRSHT : AI3ldstidx<0b1111, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, am3offset:$offset), IndexModePost, - LdMiscFrm, IIC_iLoad_bh_ru, - "ldrsht", "\t$dst, [$base], $offset", "$base = $base_wb", []> { +def LDRSHT : AI3ldstidxT<0b1111, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb), + (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, + "ldrsht", "\t$Rt, $addr", "$addr.base = $base_wb", []> { let Inst{21} = 1; // overwrite } } @@ -1734,55 +1795,61 @@ def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm, [(truncstorei16 GPR:$Rt, addrmode3:$addr)]>; // Store doubleword -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1, - isCodeGenOnly = 1 in // $src2 doesn't exist in asm string -def STRD : AI3str<0b1111, (outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr), +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in +def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr), StMiscFrm, IIC_iStore_d_r, - "strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>; + "strd", "\t$Rt, $src2, $addr", []>, Requires<[IsARM, HasV5TE]>; // Indexed stores def STR_PRE : AI2stridx<0, 1, (outs GPR:$Rn_wb), (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), IndexModePre, StFrm, IIC_iStore_ru, - "str", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb", + "str", "\t$Rt, [$Rn, $offset]!", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPR:$Rn_wb, (pre_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>; def STR_POST : AI2stridx<0, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), IndexModePost, StFrm, IIC_iStore_ru, - "str", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", + "str", "\t$Rt, [$Rn], $offset", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPR:$Rn_wb, (post_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>; def STRB_PRE : AI2stridx<1, 1, (outs GPR:$Rn_wb), (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), IndexModePre, StFrm, IIC_iStore_bh_ru, - "strb", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb", + "strb", "\t$Rt, [$Rn, $offset]!", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPR:$Rn_wb, (pre_truncsti8 GPR:$Rt, GPR:$Rn, am2offset:$offset))]>; def STRB_POST: AI2stridx<1, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), IndexModePost, StFrm, IIC_iStore_bh_ru, - "strb", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", + "strb", "\t$Rt, [$Rn], $offset", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPR:$Rn_wb, (post_truncsti8 GPR:$Rt, GPR:$Rn, am2offset:$offset))]>; def STRH_PRE : AI3stridx<0b1011, 0, 1, (outs GPR:$Rn_wb), (ins GPR:$Rt, GPR:$Rn, am3offset:$offset), IndexModePre, StMiscFrm, IIC_iStore_ru, - "strh", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb", + "strh", "\t$Rt, [$Rn, $offset]!", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPR:$Rn_wb, (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>; def STRH_POST: AI3stridx<0b1011, 0, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, GPR:$Rn, am3offset:$offset), IndexModePost, StMiscFrm, IIC_iStore_bh_ru, - "strh", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", + "strh", "\t$Rt, [$Rn], $offset", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>; // For disassembly only +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { def STRD_PRE : AI3stdpr<(outs GPR:$base_wb), (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset), StMiscFrm, IIC_iStore_d_ru, @@ -1795,31 +1862,32 @@ def STRD_POST: AI3stdpo<(outs GPR:$base_wb), StMiscFrm, IIC_iStore_d_ru, "strd", "\t$src1, $src2, [$base], $offset", "$base = $base_wb", []>; +} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 // STRT, STRBT, and STRHT are for disassembly only. -def STRT : AI2stridx<0, 0, (outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rn,am2offset:$offset), - IndexModeNone, StFrm, IIC_iStore_ru, - "strt", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", - [/* For disassembly only; pattern left blank */]> { +def STRT : AI2stridxT<0, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode2:$addr), + IndexModePost, StFrm, IIC_iStore_ru, + "strt", "\t$Rt, $addr", "$addr.base = $Rn_wb", + [/* For disassembly only; pattern left blank */]> { let Inst{21} = 1; // overwrite + let AsmMatchConverter = "CvtStWriteBackRegAddrMode2"; } -def STRBT : AI2stridx<1, 0, (outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), - IndexModeNone, StFrm, IIC_iStore_bh_ru, - "strbt", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", - [/* For disassembly only; pattern left blank */]> { +def STRBT : AI2stridxT<1, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode2:$addr), + IndexModePost, StFrm, IIC_iStore_bh_ru, + "strbt", "\t$Rt, $addr", "$addr.base = $Rn_wb", + [/* For disassembly only; pattern left blank */]> { let Inst{21} = 1; // overwrite + let AsmMatchConverter = "CvtStWriteBackRegAddrMode2"; } -def STRHT: AI3sthpo<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am3offset:$offset), +def STRHT: AI3sthpo<(outs GPR:$base_wb), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm, IIC_iStore_bh_ru, - "strht", "\t$src, [$base], $offset", "$base = $base_wb", + "strht", "\t$Rt, $addr", "$addr.base = $base_wb", [/* For disassembly only; pattern left blank */]> { let Inst{21} = 1; // overwrite + let AsmMatchConverter = "CvtStWriteBackRegAddrMode3"; } //===----------------------------------------------------------------------===// @@ -1892,7 +1960,7 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, let Inst{21} = 1; // Writeback let Inst{20} = L_bit; } -} +} let neverHasSideEffects = 1 in { @@ -1912,16 +1980,10 @@ def : MnemonicAlias<"stm", "stmia">; // FIXME: Should pc be an implicit operand like PICADD, etc? let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in -// FIXME: Should be a pseudo-instruction. -def LDMIA_RET : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, - reglist:$regs, variable_ops), - IndexModeUpd, LdStMulFrm, IIC_iLoad_mBr, - "ldmia${p}\t$Rn!, $regs", - "$Rn = $wb", []> { - let Inst{24-23} = 0b01; // Increment After - let Inst{21} = 1; // Writeback - let Inst{20} = 1; // Load -} +def LDMIA_RET : ARMPseudoInst<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, + reglist:$regs, variable_ops), + Size4Bytes, IIC_iLoad_mBr, []>, + RegConstraint<"$Rn = $wb">; //===----------------------------------------------------------------------===// // Move Instructions. @@ -1933,6 +1995,7 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr, bits<4> Rd; bits<4> Rm; + let Inst{19-16} = 0b0000; let Inst{11-4} = 0b00000000; let Inst{25} = 0; let Inst{3-0} = Rm; @@ -1959,6 +2022,7 @@ def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src), bits<4> Rd; bits<12> src; let Inst{15-12} = Rd; + let Inst{19-16} = 0b0000; let Inst{11-0} = src; let Inst{25} = 0; } @@ -2145,10 +2209,12 @@ defm SBC : AI1_adde_sube_irs<0b0110, "sbc", BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>; // ADC and SUBC with 's' bit set. -defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs", - BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>; -defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs", - BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>; +let usesCustomInserter = 1 in { +defm ADCS : AI1_adde_sube_s_irs< + BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>; +defm SBCS : AI1_adde_sube_s_irs< + BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>; +} def RSBri : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iALUi, "rsb", "\t$Rd, $Rn, $imm", @@ -2190,31 +2256,17 @@ def RSBrs : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), } // RSB with 's' bit set. -let isCodeGenOnly = 1, Defs = [CPSR] in { -def RSBSri : AI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, - IIC_iALUi, "rsbs", "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]> { - bits<4> Rd; - bits<4> Rn; - bits<12> imm; - let Inst{25} = 1; - let Inst{20} = 1; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - let Inst{11-0} = imm; -} -def RSBSrs : AI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - DPSoRegFrm, IIC_iALUsr, "rsbs", "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{20} = 1; - let Inst{11-0} = shift; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; -} +// NOTE: CPSR def omitted because it will be handled by the custom inserter. +let usesCustomInserter = 1 in { +def RSBSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), + Size4Bytes, IIC_iALUi, + [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]>; +def RSBSrr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + Size4Bytes, IIC_iALUr, + [/* For disassembly only; pattern left blank */]>; +def RSBSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), + Size4Bytes, IIC_iALUsr, + [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]>; } let Uses = [CPSR] in { @@ -2258,34 +2310,14 @@ def RSCrs : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), } } -// FIXME: Allow these to be predicated. -let isCodeGenOnly = 1, Defs = [CPSR], Uses = [CPSR] in { -def RSCSri : AXI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), - DPFrm, IIC_iALUi, "rscs\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>, - Requires<[IsARM]> { - bits<4> Rd; - bits<4> Rn; - bits<12> imm; - let Inst{25} = 1; - let Inst{20} = 1; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - let Inst{11-0} = imm; -} -def RSCSrs : AXI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - DPSoRegFrm, IIC_iALUsr, "rscs\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>, - Requires<[IsARM]> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{20} = 1; - let Inst{11-0} = shift; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; -} +// NOTE: CPSR def omitted because it will be handled by the custom inserter. +let usesCustomInserter = 1, Uses = [CPSR] in { +def RSCSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), + Size4Bytes, IIC_iALUi, + [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>; +def RSCSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), + Size4Bytes, IIC_iALUsr, + [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>; } // (sub X, imm) gets canonicalized to (add X, -imm). Match this form. @@ -2300,8 +2332,10 @@ def : ARMPat<(addc GPR:$src, so_imm_neg:$imm), // The with-carry-in form matches bitwise not instead of the negation. // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. -def : ARMPat<(adde GPR:$src, so_imm_not:$imm), +def : ARMPat<(adde_dead_carry GPR:$src, so_imm_not:$imm), (SBCri GPR:$src, so_imm_not:$imm)>; +def : ARMPat<(adde_live_carry GPR:$src, so_imm_not:$imm), + (SBCSri GPR:$src, so_imm_not:$imm)>; // Note: These are implemented in C++ code, because they have to generate // ADD/SUBrs instructions, which use a complex pattern that a xform function @@ -2617,14 +2651,16 @@ def MULv5: ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, def MUL : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6]> { + let Inst{15-12} = 0b0000; +} } let Constraints = "@earlyclobber $Rd" in def MLAv5: ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s), - Size4Bytes, IIC_iMAC32, - [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, + Size4Bytes, IIC_iMAC32, + [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, Requires<[IsARM, NoV6]> { bits<4> Ra; let Inst{15-12} = Ra; @@ -2657,7 +2693,7 @@ let neverHasSideEffects = 1 in { let isCommutable = 1 in { let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { def SMULLv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), Size4Bytes, IIC_iMUL64, []>, Requires<[IsARM, NoV6]>; @@ -2681,15 +2717,15 @@ def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi), // Multiply + accumulate let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { def SMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), Size4Bytes, IIC_iMAC64, []>, Requires<[IsARM, NoV6]>; def UMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), Size4Bytes, IIC_iMAC64, []>, Requires<[IsARM, NoV6]>; def UMAALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), Size4Bytes, IIC_iMAC64, []>, Requires<[IsARM, NoV6]>; @@ -2970,17 +3006,25 @@ def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), IIC_iUNAr, "revsh", "\t$Rd, $Rm", [(set GPR:$Rd, (sext_inreg - (or (srl (and GPR:$Rm, 0xFF00), (i32 8)), + (or (srl GPR:$Rm, (i32 8)), (shl GPR:$Rm, (i32 8))), i16))]>, Requires<[IsARM, HasV6]>; +def : ARMV6Pat<(sext_inreg (or (srl (and GPR:$Rm, 0xFF00), (i32 8)), + (shl GPR:$Rm, (i32 8))), i16), + (REVSH GPR:$Rm)>; + +// Need the AddedComplexity or else MOVs + REV would be chosen. +let AddedComplexity = 5 in +def : ARMV6Pat<(sra (bswap GPR:$Rm), (i32 16)), (REVSH GPR:$Rm)>; + def lsl_shift_imm : SDNodeXForm<imm, [{ unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue()); return CurDAG->getTargetConstant(Sh, MVT::i32); }]>; -def lsl_amt : PatLeaf<(i32 imm), [{ - return (N->getZExtValue() < 32); +def lsl_amt : ImmLeaf<i32, [{ + return Imm > 0 && Imm < 32; }], lsl_shift_imm>; def PKHBT : APKHI<0b01101000, 0, (outs GPR:$Rd), @@ -3002,8 +3046,8 @@ def asr_shift_imm : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(Sh, MVT::i32); }]>; -def asr_amt : PatLeaf<(i32 imm), [{ - return (N->getZExtValue() <= 32); +def asr_amt : ImmLeaf<i32, [{ + return Imm > 0 && Imm <= 32; }], asr_shift_imm>; // Note: Shifts of 1-15 bits will be transformed to srl instead of sra and @@ -3119,88 +3163,43 @@ def BCCZi64 : PseudoInst<(outs), // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( -// FIXME: These should all be pseudo-instructions that get expanded to -// the normal MOV instructions. That would fix the dependency on -// special casing them in tblgen. let neverHasSideEffects = 1 in { -def MOVCCr : AI1<0b1101, (outs GPR:$Rd), (ins GPR:$false, GPR:$Rm), DPFrm, - IIC_iCMOVr, "mov", "\t$Rd, $Rm", - [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">, UnaryDP { - bits<4> Rd; - bits<4> Rm; - let Inst{25} = 0; - let Inst{20} = 0; - let Inst{15-12} = Rd; - let Inst{11-4} = 0b00000000; - let Inst{3-0} = Rm; -} - -def MOVCCs : AI1<0b1101, (outs GPR:$Rd), - (ins GPR:$false, so_reg:$shift), DPSoRegFrm, IIC_iCMOVsr, - "mov", "\t$Rd, $shift", - [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">, UnaryDP { - bits<4> Rd; - bits<12> shift; - let Inst{25} = 0; - let Inst{20} = 0; - let Inst{19-16} = 0; - let Inst{15-12} = Rd; - let Inst{11-0} = shift; -} +def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p), + Size4Bytes, IIC_iCMOVr, + [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>, + RegConstraint<"$false = $Rd">; +def MOVCCs : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, so_reg:$shift, pred:$p), + Size4Bytes, IIC_iCMOVsr, + [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>, + RegConstraint<"$false = $Rd">; let isMoveImm = 1 in -def MOVCCi16 : AI1<0b1000, (outs GPR:$Rd), (ins GPR:$false, i32imm_hilo16:$imm), - DPFrm, IIC_iMOVi, - "movw", "\t$Rd, $imm", - []>, - RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>, - UnaryDP { - bits<4> Rd; - bits<16> imm; - let Inst{25} = 1; - let Inst{20} = 0; - let Inst{19-16} = imm{15-12}; - let Inst{15-12} = Rd; - let Inst{11-0} = imm{11-0}; -} +def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, i32imm_hilo16:$imm, pred:$p), + Size4Bytes, IIC_iMOVi, + []>, + RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>; let isMoveImm = 1 in -def MOVCCi : AI1<0b1101, (outs GPR:$Rd), - (ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi, - "mov", "\t$Rd, $imm", +def MOVCCi : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, so_imm:$imm, pred:$p), + Size4Bytes, IIC_iCMOVi, [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">, UnaryDP { - bits<4> Rd; - bits<12> imm; - let Inst{25} = 1; - let Inst{20} = 0; - let Inst{19-16} = 0b0000; - let Inst{15-12} = Rd; - let Inst{11-0} = imm; -} + RegConstraint<"$false = $Rd">; // Two instruction predicate mov immediate. let isMoveImm = 1 in -def MOVCCi32imm : PseudoInst<(outs GPR:$Rd), - (ins GPR:$false, i32imm:$src, pred:$p), - IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">; +def MOVCCi32imm : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, i32imm:$src, pred:$p), + Size8Bytes, IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">; let isMoveImm = 1 in -def MVNCCi : AI1<0b1111, (outs GPR:$Rd), - (ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi, - "mvn", "\t$Rd, $imm", +def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, so_imm:$imm, pred:$p), + Size4Bytes, IIC_iCMOVi, [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">, UnaryDP { - bits<4> Rd; - bits<12> imm; - let Inst{25} = 1; - let Inst{20} = 0; - let Inst{19-16} = 0b0000; - let Inst{15-12} = Rd; - let Inst{11-0} = imm; -} + RegConstraint<"$false = $Rd">; } // neverHasSideEffects //===----------------------------------------------------------------------===// @@ -3221,13 +3220,6 @@ def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, let Inst{31-4} = 0xf57ff05; let Inst{3-0} = opt; } - -def DMB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary, - "mcr", "\tp15, 0, $zero, c7, c10, 5", - [(ARMMemBarrierMCR GPR:$zero)]>, - Requires<[IsARM, HasV6]> { - // FIXME: add encoding -} } def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, @@ -3266,6 +3258,18 @@ let usesCustomInserter = 1 in { def ATOMIC_LOAD_NAND_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_MIN_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_MAX_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMIN_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMAX_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_ADD_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>; @@ -3284,6 +3288,18 @@ let usesCustomInserter = 1 in { def ATOMIC_LOAD_NAND_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_MIN_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_MAX_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMIN_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMAX_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_ADD_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>; @@ -3302,6 +3318,18 @@ let usesCustomInserter = 1 in { def ATOMIC_LOAD_NAND_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_MIN_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_MAX_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMIN_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMAX_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>; def ATOMIC_SWAP_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, @@ -3326,39 +3354,26 @@ let usesCustomInserter = 1 in { } let mayLoad = 1 in { -def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary, - "ldrexb", "\t$Rt, [$Rn]", - []>; -def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary, - "ldrexh", "\t$Rt, [$Rn]", - []>; -def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary, - "ldrex", "\t$Rt, [$Rn]", - []>; -def LDREXD : AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2), (ins GPR:$Rn), - NoItinerary, - "ldrexd", "\t$Rt, $Rt2, [$Rn]", - []>; +def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary, + "ldrexb", "\t$Rt, $addr", []>; +def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary, + "ldrexh", "\t$Rt, $addr", []>; +def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary, + "ldrex", "\t$Rt, $addr", []>; +def LDREXD : AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode7:$addr), + NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []>; } let mayStore = 1, Constraints = "@earlyclobber $Rd" in { -def STREXB : AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$src, GPR:$Rn), - NoItinerary, - "strexb", "\t$Rd, $src, [$Rn]", - []>; -def STREXH : AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rn), - NoItinerary, - "strexh", "\t$Rd, $Rt, [$Rn]", - []>; -def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rn), - NoItinerary, - "strex", "\t$Rd, $Rt, [$Rn]", - []>; +def STREXB : AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr), + NoItinerary, "strexb", "\t$Rd, $Rt, $addr", []>; +def STREXH : AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr), + NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>; +def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr), + NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>; def STREXD : AIstrex<0b01, (outs GPR:$Rd), - (ins GPR:$Rt, GPR:$Rt2, GPR:$Rn), - NoItinerary, - "strexd", "\t$Rd, $Rt, $Rt2, [$Rn]", - []>; + (ins GPR:$Rt, GPR:$Rt2, addrmode7:$addr), + NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []>; } // Clear-Exclusive is for disassembly only. @@ -3377,238 +3392,7 @@ def SWPB : AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swpb", } //===----------------------------------------------------------------------===// -// TLS Instructions -// - -// __aeabi_read_tp preserves the registers r1-r3. -// This is a pseudo inst so that we can get the encoding right, -// complete with fixup for the aeabi_read_tp function. -let isCall = 1, - Defs = [R0, R12, LR, CPSR], Uses = [SP] in { - def TPsoft : PseudoInst<(outs), (ins), IIC_Br, - [(set R0, ARMthread_pointer)]>; -} - -//===----------------------------------------------------------------------===// -// SJLJ Exception handling intrinsics -// eh_sjlj_setjmp() is an instruction sequence to store the return -// address and save #0 in R0 for the non-longjmp case. -// Since by its nature we may be coming from some other function to get -// here, and we're using the stack frame for the containing function to -// save/restore registers, we can't keep anything live in regs across -// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon -// when we get here from a longjmp(). We force everthing out of registers -// except for our own input by listing the relevant registers in Defs. By -// doing so, we also cause the prologue/epilogue code to actively preserve -// all of the callee-saved resgisters, which is exactly what we want. -// A constant value is passed in $val, and we use the location as a scratch. -// -// These are pseudo-instructions and are lowered to individual MC-insts, so -// no encoding information is necessary. -let Defs = - [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0, - D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, - D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, - D31 ], hasSideEffects = 1, isBarrier = 1 in { - def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val), - NoItinerary, - [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, - Requires<[IsARM, HasVFP2]>; -} - -let Defs = - [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ], - hasSideEffects = 1, isBarrier = 1 in { - def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val), - NoItinerary, - [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, - Requires<[IsARM, NoVFP]>; -} - -// FIXME: Non-Darwin version(s) -let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, - Defs = [ R7, LR, SP ] in { -def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch), - NoItinerary, - [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, - Requires<[IsARM, IsDarwin]>; -} - -// eh.sjlj.dispatchsetup pseudo-instruction. -// This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are -// handled when the pseudo is expanded (which happens before any passes -// that need the instruction size). -let isBarrier = 1, hasSideEffects = 1 in -def Int_eh_sjlj_dispatchsetup : - PseudoInst<(outs), (ins GPR:$src), NoItinerary, - [(ARMeh_sjlj_dispatchsetup GPR:$src)]>, - Requires<[IsDarwin]>; - -//===----------------------------------------------------------------------===// -// Non-Instruction Patterns -// - -// Large immediate handling. - -// 32-bit immediate using two piece so_imms or movw + movt. -// This is a single pseudo instruction, the benefit is that it can be remat'd -// as a single unit instead of having to handle reg inputs. -// FIXME: Remove this when we can do generalized remat. -let isReMaterializable = 1, isMoveImm = 1 in -def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2, - [(set GPR:$dst, (arm_i32imm:$src))]>, - Requires<[IsARM]>; - -// Pseudo instruction that combines movw + movt + add pc (if PIC). -// It also makes it possible to rematerialize the instructions. -// FIXME: Remove this when we can do generalized remat and when machine licm -// can properly the instructions. -let isReMaterializable = 1 in { -def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), - IIC_iMOVix2addpc, - [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>, - Requires<[IsARM, UseMovt]>; - -def MOV_ga_dyn : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), - IIC_iMOVix2, - [(set GPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>, - Requires<[IsARM, UseMovt]>; - -let AddedComplexity = 10 in -def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), - IIC_iMOVix2ld, - [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>, - Requires<[IsARM, UseMovt]>; -} // isReMaterializable - -// ConstantPool, GlobalAddress, and JumpTable -def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>, - Requires<[IsARM, DontUseMovt]>; -def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>; -def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>, - Requires<[IsARM, UseMovt]>; -def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id), - (LEApcrelJT tjumptable:$dst, imm:$id)>; - -// TODO: add,sub,and, 3-instr forms? - -// Tail calls -def : ARMPat<(ARMtcret tcGPR:$dst), - (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>; - -def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), - (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>; - -def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), - (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>; - -def : ARMPat<(ARMtcret tcGPR:$dst), - (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>; - -def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), - (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>; - -def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), - (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>; - -// Direct calls -def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>, - Requires<[IsARM, IsNotDarwin]>; -def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>, - Requires<[IsARM, IsDarwin]>; - -// zextload i1 -> zextload i8 -def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; -def : ARMPat<(zextloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>; - -// extload -> zextload -def : ARMPat<(extloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; -def : ARMPat<(extloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>; -def : ARMPat<(extloadi8 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; -def : ARMPat<(extloadi8 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>; - -def : ARMPat<(extloadi16 addrmode3:$addr), (LDRH addrmode3:$addr)>; - -def : ARMPat<(extloadi8 addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>; -def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>; - -// smul* and smla* -def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)), - (sra (shl GPR:$b, (i32 16)), (i32 16))), - (SMULBB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b), - (SMULBB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)), - (sra GPR:$b, (i32 16))), - (SMULBT GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))), - (SMULBT GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), - (sra (shl GPR:$b, (i32 16)), (i32 16))), - (SMULTB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b), - (SMULTB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))), - (i32 16)), - (SMULWB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)), - (SMULWB GPR:$a, GPR:$b)>; - -def : ARMV5TEPat<(add GPR:$acc, - (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), - (sra (shl GPR:$b, (i32 16)), (i32 16)))), - (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, - (mul sext_16_node:$a, sext_16_node:$b)), - (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, - (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), - (sra GPR:$b, (i32 16)))), - (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, - (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))), - (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, - (mul (sra GPR:$a, (i32 16)), - (sra (shl GPR:$b, (i32 16)), (i32 16)))), - (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, - (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)), - (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, - (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))), - (i32 16))), - (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, - (sra (mul GPR:$a, sext_16_node:$b), (i32 16))), - (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; - -//===----------------------------------------------------------------------===// -// Thumb Support -// - -include "ARMInstrThumb.td" - -//===----------------------------------------------------------------------===// -// Thumb2 Support -// - -include "ARMInstrThumb2.td" - -//===----------------------------------------------------------------------===// -// Floating Point Support -// - -include "ARMInstrVFP.td" - -//===----------------------------------------------------------------------===// -// Advanced SIMD (NEON) Support -// - -include "ARMInstrNEON.td" - -//===----------------------------------------------------------------------===// -// Coprocessor Instructions. For disassembly only. +// Coprocessor Instructions. // def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1, @@ -3652,17 +3436,18 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1, let Inst{23-20} = opc1; } -class ACI<dag oops, dag iops, string opc, string asm> - : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, NoItinerary, - opc, asm, "", [/* For disassembly only; pattern left blank */]> { +class ACI<dag oops, dag iops, string opc, string asm, + IndexMode im = IndexModeNone> + : InoP<oops, iops, AddrModeNone, Size4Bytes, im, BrFrm, NoItinerary, + opc, asm, "", [/* For disassembly only; pattern left blank */]> { let Inst{27-25} = 0b110; } -multiclass LdStCop<bits<4> op31_28, bit load, string opc> { +multiclass LdStCop<bits<4> op31_28, bit load, dag ops, string opc, string cond>{ def _OFFSET : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), - opc, "\tp$cop, cr$CRd, $addr"> { + !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), + !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr"> { let Inst{31-28} = op31_28; let Inst{24} = 1; // P = 1 let Inst{21} = 0; // W = 0 @@ -3671,8 +3456,8 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { } def _PRE : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), - opc, "\tp$cop, cr$CRd, $addr!"> { + !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), + !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr!", IndexModePre> { let Inst{31-28} = op31_28; let Inst{24} = 1; // P = 1 let Inst{21} = 1; // W = 1 @@ -3681,8 +3466,8 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { } def _POST : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset), - opc, "\tp$cop, cr$CRd, [$base], $offset"> { + !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), + !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr", IndexModePost> { let Inst{31-28} = op31_28; let Inst{24} = 0; // P = 0 let Inst{21} = 1; // W = 1 @@ -3691,8 +3476,9 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { } def _OPTION : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, i32imm:$option), - opc, "\tp$cop, cr$CRd, [$base], $option"> { + !con((ins nohash_imm:$cop,nohash_imm:$CRd,GPR:$base, nohash_imm:$option), + ops), + !strconcat(opc, cond), "\tp$cop, cr$CRd, [$base], \\{$option\\}"> { let Inst{31-28} = op31_28; let Inst{24} = 0; // P = 0 let Inst{23} = 1; // U = 1 @@ -3702,8 +3488,8 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { } def L_OFFSET : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), - !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr"> { + !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), + !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr"> { let Inst{31-28} = op31_28; let Inst{24} = 1; // P = 1 let Inst{21} = 0; // W = 0 @@ -3712,8 +3498,9 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { } def L_PRE : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), - !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr!"> { + !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), + !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr!", + IndexModePre> { let Inst{31-28} = op31_28; let Inst{24} = 1; // P = 1 let Inst{21} = 1; // W = 1 @@ -3722,8 +3509,9 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { } def L_POST : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset), - !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $offset"> { + !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), + !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr", + IndexModePost> { let Inst{31-28} = op31_28; let Inst{24} = 0; // P = 0 let Inst{21} = 1; // W = 1 @@ -3732,8 +3520,10 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { } def L_OPTION : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, nohash_imm:$option), - !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $option"> { + !con((ins nohash_imm:$cop, nohash_imm:$CRd,GPR:$base,nohash_imm:$option), + ops), + !strconcat(!strconcat(opc, "l"), cond), + "\tp$cop, cr$CRd, [$base], \\{$option\\}"> { let Inst{31-28} = op31_28; let Inst{24} = 0; // P = 0 let Inst{23} = 1; // U = 1 @@ -3743,19 +3533,18 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { } } -defm LDC : LdStCop<{?,?,?,?}, 1, "ldc">; -defm LDC2 : LdStCop<0b1111, 1, "ldc2">; -defm STC : LdStCop<{?,?,?,?}, 0, "stc">; -defm STC2 : LdStCop<0b1111, 0, "stc2">; +defm LDC : LdStCop<{?,?,?,?}, 1, (ins pred:$p), "ldc", "${p}">; +defm LDC2 : LdStCop<0b1111, 1, (ins), "ldc2", "">; +defm STC : LdStCop<{?,?,?,?}, 0, (ins pred:$p), "stc", "${p}">; +defm STC2 : LdStCop<0b1111, 0, (ins), "stc2", "">; //===----------------------------------------------------------------------===// // Move between coprocessor and ARM core register -- for disassembly only // -class MovRCopro<string opc, bit direction> - : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1, - GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), - NoItinerary, opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2", +class MovRCopro<string opc, bit direction, dag oops, dag iops> + : ABI<0b1110, oops, iops, NoItinerary, opc, + "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2", [/* For disassembly only; pattern left blank */]> { let Inst{20} = direction; let Inst{4} = 1; @@ -3775,13 +3564,17 @@ class MovRCopro<string opc, bit direction> let Inst{19-16} = CRn; } -def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */>; -def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */>; +def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */, + (outs), (ins p_imm:$cop, i32imm:$opc1, + GPR:$Rt, c_imm:$CRn, c_imm:$CRm, + i32imm:$opc2)>; +def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */, + (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, + c_imm:$CRn, c_imm:$CRm, i32imm:$opc2)>; -class MovRCopro2<string opc, bit direction> - : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1, - GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), - NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), +class MovRCopro2<string opc, bit direction, dag oops, dag iops> + : ABXI<0b1110, oops, iops, NoItinerary, + !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), [/* For disassembly only; pattern left blank */]> { let Inst{31-28} = 0b1111; let Inst{20} = direction; @@ -3802,8 +3595,14 @@ class MovRCopro2<string opc, bit direction> let Inst{19-16} = CRn; } -def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */>; -def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */>; +def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */, + (outs), (ins p_imm:$cop, i32imm:$opc1, + GPR:$Rt, c_imm:$CRn, c_imm:$CRm, + i32imm:$opc2)>; +def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, + (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, + c_imm:$CRn, c_imm:$CRm, + i32imm:$opc2)>; class MovRRCopro<string opc, bit direction> : ABI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1, @@ -3909,3 +3708,241 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, let Inst{15-12} = 0b1111; let Inst{11-0} = a; } + +//===----------------------------------------------------------------------===// +// TLS Instructions +// + +// __aeabi_read_tp preserves the registers r1-r3. +// This is a pseudo inst so that we can get the encoding right, +// complete with fixup for the aeabi_read_tp function. +let isCall = 1, + Defs = [R0, R12, LR, CPSR], Uses = [SP] in { + def TPsoft : PseudoInst<(outs), (ins), IIC_Br, + [(set R0, ARMthread_pointer)]>; +} + +//===----------------------------------------------------------------------===// +// SJLJ Exception handling intrinsics +// eh_sjlj_setjmp() is an instruction sequence to store the return +// address and save #0 in R0 for the non-longjmp case. +// Since by its nature we may be coming from some other function to get +// here, and we're using the stack frame for the containing function to +// save/restore registers, we can't keep anything live in regs across +// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon +// when we get here from a longjmp(). We force everything out of registers +// except for our own input by listing the relevant registers in Defs. By +// doing so, we also cause the prologue/epilogue code to actively preserve +// all of the callee-saved resgisters, which is exactly what we want. +// A constant value is passed in $val, and we use the location as a scratch. +// +// These are pseudo-instructions and are lowered to individual MC-insts, so +// no encoding information is necessary. +let Defs = + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0, + D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, + D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, + D31 ], hasSideEffects = 1, isBarrier = 1 in { + def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val), + NoItinerary, + [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, + Requires<[IsARM, HasVFP2]>; +} + +let Defs = + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ], + hasSideEffects = 1, isBarrier = 1 in { + def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val), + NoItinerary, + [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, + Requires<[IsARM, NoVFP]>; +} + +// FIXME: Non-Darwin version(s) +let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, + Defs = [ R7, LR, SP ] in { +def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch), + NoItinerary, + [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, + Requires<[IsARM, IsDarwin]>; +} + +// eh.sjlj.dispatchsetup pseudo-instruction. +// This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are +// handled when the pseudo is expanded (which happens before any passes +// that need the instruction size). +let isBarrier = 1, hasSideEffects = 1 in +def Int_eh_sjlj_dispatchsetup : + PseudoInst<(outs), (ins), NoItinerary, + [(ARMeh_sjlj_dispatchsetup)]>, + Requires<[IsDarwin]>; + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +// + +// Large immediate handling. + +// 32-bit immediate using two piece so_imms or movw + movt. +// This is a single pseudo instruction, the benefit is that it can be remat'd +// as a single unit instead of having to handle reg inputs. +// FIXME: Remove this when we can do generalized remat. +let isReMaterializable = 1, isMoveImm = 1 in +def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2, + [(set GPR:$dst, (arm_i32imm:$src))]>, + Requires<[IsARM]>; + +// Pseudo instruction that combines movw + movt + add pc (if PIC). +// It also makes it possible to rematerialize the instructions. +// FIXME: Remove this when we can do generalized remat and when machine licm +// can properly the instructions. +let isReMaterializable = 1 in { +def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + IIC_iMOVix2addpc, + [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>, + Requires<[IsARM, UseMovt]>; + +def MOV_ga_dyn : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + IIC_iMOVix2, + [(set GPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>, + Requires<[IsARM, UseMovt]>; + +let AddedComplexity = 10 in +def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + IIC_iMOVix2ld, + [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>, + Requires<[IsARM, UseMovt]>; +} // isReMaterializable + +// ConstantPool, GlobalAddress, and JumpTable +def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>, + Requires<[IsARM, DontUseMovt]>; +def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>; +def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>, + Requires<[IsARM, UseMovt]>; +def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id), + (LEApcrelJT tjumptable:$dst, imm:$id)>; + +// TODO: add,sub,and, 3-instr forms? + +// Tail calls +def : ARMPat<(ARMtcret tcGPR:$dst), + (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>; + +def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), + (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>; + +def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), + (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>; + +def : ARMPat<(ARMtcret tcGPR:$dst), + (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>; + +def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), + (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>; + +def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), + (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>; + +// Direct calls +def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>, + Requires<[IsARM, IsNotDarwin]>; +def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>, + Requires<[IsARM, IsDarwin]>; + +// zextload i1 -> zextload i8 +def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; +def : ARMPat<(zextloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>; + +// extload -> zextload +def : ARMPat<(extloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; +def : ARMPat<(extloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>; +def : ARMPat<(extloadi8 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; +def : ARMPat<(extloadi8 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>; + +def : ARMPat<(extloadi16 addrmode3:$addr), (LDRH addrmode3:$addr)>; + +def : ARMPat<(extloadi8 addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>; +def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>; + +// smul* and smla* +def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)), + (sra (shl GPR:$b, (i32 16)), (i32 16))), + (SMULBB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b), + (SMULBB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)), + (sra GPR:$b, (i32 16))), + (SMULBT GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))), + (SMULBT GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), + (sra (shl GPR:$b, (i32 16)), (i32 16))), + (SMULTB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b), + (SMULTB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))), + (i32 16)), + (SMULWB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)), + (SMULWB GPR:$a, GPR:$b)>; + +def : ARMV5TEPat<(add GPR:$acc, + (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), + (sra (shl GPR:$b, (i32 16)), (i32 16)))), + (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (mul sext_16_node:$a, sext_16_node:$b)), + (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), + (sra GPR:$b, (i32 16)))), + (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))), + (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (mul (sra GPR:$a, (i32 16)), + (sra (shl GPR:$b, (i32 16)), (i32 16)))), + (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)), + (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))), + (i32 16))), + (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (sra (mul GPR:$a, sext_16_node:$b), (i32 16))), + (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; + + +// Pre-v7 uses MCR for synchronization barriers. +def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>, + Requires<[IsARM, HasV6]>; + + +//===----------------------------------------------------------------------===// +// Thumb Support +// + +include "ARMInstrThumb.td" + +//===----------------------------------------------------------------------===// +// Thumb2 Support +// + +include "ARMInstrThumb2.td" + +//===----------------------------------------------------------------------===// +// Floating Point Support +// + +include "ARMInstrVFP.td" + +//===----------------------------------------------------------------------===// +// Advanced SIMD (NEON) Support +// + +include "ARMInstrNEON.td" + diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index dc3d63e..e34d69a 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -80,6 +80,12 @@ def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; +def NEONvbsl : SDNode<"ARMISD::VBSL", + SDTypeProfile<1, 3, [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>]>>; + def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; // VDUPLANE can produce a quad-register result from a double-register source, @@ -146,10 +152,6 @@ def VLDMQIA : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn), IIC_fpLoad_m, "", [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>; -def VLDMQDB - : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn), - IIC_fpLoad_m, "", - [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>; // Use VSTM to store a Q register as a D register pair. // This is a pseudo instruction that is expanded to VSTMD after reg alloc. @@ -157,10 +159,6 @@ def VSTMQIA : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn), IIC_fpStore_m, "", [(store (v2f64 QPR:$src), GPR:$Rn)]>; -def VSTMQDB - : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn), - IIC_fpStore_m, "", - [(store (v2f64 QPR:$src), GPR:$Rn)]>; // Classes for VLD* pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. @@ -1801,7 +1799,7 @@ class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, class N3VDSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> - : N3V<0, 1, op21_20, op11_8, 1, 0, + : N3VLane32<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set (Ty DPR:$Vd), @@ -1811,7 +1809,7 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8, } class N3VDSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> - : N3V<0, 1, op21_20, op11_8, 1, 0, + : N3VLane16<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","", [(set (Ty DPR:$Vd), @@ -1841,7 +1839,7 @@ class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, class N3VQSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode ShOp> - : N3V<1, 1, op21_20, op11_8, 1, 0, + : N3VLane32<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set (ResTy QPR:$Vd), @@ -1852,7 +1850,7 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8, } class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode ShOp> - : N3V<1, 1, op21_20, op11_8, 1, 0, + : N3VLane16<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","", [(set (ResTy QPR:$Vd), @@ -1874,7 +1872,7 @@ class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, } class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> - : N3V<0, 1, op21_20, op11_8, 1, 0, + : N3VLane32<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set (Ty DPR:$Vd), @@ -1885,7 +1883,7 @@ class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, } class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> - : N3V<0, 1, op21_20, op11_8, 1, 0, + : N3VLane16<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set (Ty DPR:$Vd), @@ -1915,7 +1913,7 @@ class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N3V<1, 1, op21_20, op11_8, 1, 0, + : N3VLane32<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set (ResTy QPR:$Vd), @@ -1927,7 +1925,7 @@ class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N3V<1, 1, op21_20, op11_8, 1, 0, + : N3VLane16<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set (ResTy QPR:$Vd), @@ -1959,7 +1957,7 @@ class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> - : N3V<0, 1, op21_20, op11_8, 1, 0, + : N3VLane32<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, @@ -1972,7 +1970,7 @@ class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode MulOp, SDNode ShOp> - : N3V<0, 1, op21_20, op11_8, 1, 0, + : N3VLane16<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, @@ -1994,7 +1992,7 @@ class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator MulOp, SDPatternOperator ShOp> - : N3V<1, 1, op21_20, op11_8, 1, 0, + : N3VLane32<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, @@ -2008,7 +2006,7 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode MulOp, SDNode ShOp> - : N3V<1, 1, op21_20, op11_8, 1, 0, + : N3VLane16<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, @@ -2069,7 +2067,7 @@ class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> - : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), + : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", @@ -2081,7 +2079,7 @@ class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> - : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), + : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", @@ -2116,7 +2114,7 @@ class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N3V<op24, 1, op21_20, op11_8, 1, 0, + : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, @@ -2129,7 +2127,7 @@ class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N3V<op24, 1, op21_20, op11_8, 1, 0, + : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, @@ -2164,7 +2162,7 @@ class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode OpNode> - : N3V<op24, 1, op21_20, op11_8, 1, 0, + : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set QPR:$Vd, @@ -2173,7 +2171,7 @@ class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode OpNode> - : N3V<op24, 1, op21_20, op11_8, 1, 0, + : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set QPR:$Vd, @@ -2219,7 +2217,7 @@ class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N3V<op24, 1, op21_20, op11_8, 1, 0, + : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set (ResTy QPR:$Vd), @@ -2229,7 +2227,7 @@ class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N3V<op24, 1, op21_20, op11_8, 1, 0, + : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set (ResTy QPR:$Vd), @@ -2288,17 +2286,17 @@ class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, // Shift by immediate, // both double- and quad-register. class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - Format f, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, SDNode OpNode> + Format f, InstrItinClass itin, Operand ImmTy, + string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> : N2VImm<op24, op23, op11_8, op7, 0, op4, - (outs DPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), f, itin, + (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - Format f, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, SDNode OpNode> + Format f, InstrItinClass itin, Operand ImmTy, + string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> : N2VImm<op24, op23, op11_8, op7, 1, op4, - (outs QPR:$Vd), (ins QPR:$Vm, i32imm:$SIMM), f, itin, + (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; @@ -2315,9 +2313,9 @@ class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, // Narrow shift by immediate. class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDNode OpNode> + ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> : N2VImm<op24, op23, op11_8, op7, op6, op4, - (outs DPR:$Vd), (ins QPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, itin, + (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; @@ -2325,16 +2323,18 @@ class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, // Shift right by immediate and accumulate, // both double- and quad-register. class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> + Operand ImmTy, string OpcodeStr, string Dt, + ValueType Ty, SDNode ShOp> : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), - (ins DPR:$src1, DPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD, + (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", [(set DPR:$Vd, (Ty (add DPR:$src1, (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> + Operand ImmTy, string OpcodeStr, string Dt, + ValueType Ty, SDNode ShOp> : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), - (ins QPR:$src1, QPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD, + (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", [(set QPR:$Vd, (Ty (add QPR:$src1, (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; @@ -2342,15 +2342,17 @@ class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, // Shift by immediate and insert, // both double- and quad-register. class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp> + Operand ImmTy, Format f, string OpcodeStr, string Dt, + ValueType Ty,SDNode ShOp> : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), - (ins DPR:$src1, DPR:$Vm, i32imm:$SIMM), f, IIC_VSHLiD, + (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp> + Operand ImmTy, Format f, string OpcodeStr, string Dt, + ValueType Ty,SDNode ShOp> : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), - (ins QPR:$src1, QPR:$Vm, i32imm:$SIMM), f, IIC_VSHLiQ, + (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; @@ -3010,40 +3012,77 @@ multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, // Neon 2-register vector shift by immediate, // with f of either N2RegVShLFrm or N2RegVShRFrm // element sizes of 8, 16, 32 and 64 bits: -multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - SDNode OpNode, Format f> { +multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + SDNode OpNode> { + // 64-bit vector types. + def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, + OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, + OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, + OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, + OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; + // imm6 = xxxxxx + + // 128-bit vector types. + def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, + OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, + OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, + OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, + OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; + // imm6 = xxxxxx +} +multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + SDNode OpNode> { // 64-bit vector types. - def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin, + def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } - def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin, + def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } - def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin, + def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } - def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, f, itin, + def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; // imm6 = xxxxxx // 128-bit vector types. - def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin, + def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } - def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin, + def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } - def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin, + def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } - def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, f, itin, + def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; // imm6 = xxxxxx } @@ -3053,79 +3092,113 @@ multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, string OpcodeStr, string Dt, SDNode ShOp> { // 64-bit vector types. - def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, + def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8, OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { let Inst{21-19} = 0b001; // imm6 = 001xxx } - def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, + def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16, OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } - def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, + def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32, OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { let Inst{21} = 0b1; // imm6 = 1xxxxx } - def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, + def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64, OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; // imm6 = xxxxxx // 128-bit vector types. - def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, + def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8, OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { let Inst{21-19} = 0b001; // imm6 = 001xxx } - def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, + def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16, OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } - def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, + def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32, OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { let Inst{21} = 0b1; // imm6 = 1xxxxx } - def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, + def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64, OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; // imm6 = xxxxxx } - // Neon Shift-Insert vector operations, // with f of either N2RegVShLFrm or N2RegVShRFrm // element sizes of 8, 16, 32 and 64 bits: -multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, - string OpcodeStr, SDNode ShOp, - Format f> { +multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr> { + // 64-bit vector types. + def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, + N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, + N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, + N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm, + N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>; + // imm6 = xxxxxx + + // 128-bit vector types. + def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, + N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, + N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, + N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm, + N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>; + // imm6 = xxxxxx +} +multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr> { // 64-bit vector types. - def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, - f, OpcodeStr, "8", v8i8, ShOp> { + def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8, + N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> { let Inst{21-19} = 0b001; // imm6 = 001xxx } - def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, - f, OpcodeStr, "16", v4i16, ShOp> { + def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16, + N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } - def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, - f, OpcodeStr, "32", v2i32, ShOp> { + def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32, + N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> { let Inst{21} = 0b1; // imm6 = 1xxxxx } - def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, - f, OpcodeStr, "64", v1i64, ShOp>; + def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64, + N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>; // imm6 = xxxxxx // 128-bit vector types. - def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, - f, OpcodeStr, "8", v16i8, ShOp> { + def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8, + N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> { let Inst{21-19} = 0b001; // imm6 = 001xxx } - def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, - f, OpcodeStr, "16", v8i16, ShOp> { + def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16, + N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } - def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, - f, OpcodeStr, "32", v4i32, ShOp> { + def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32, + N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> { let Inst{21} = 0b1; // imm6 = 1xxxxx } - def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, - f, OpcodeStr, "64", v2i64, ShOp>; + def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64, + N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>; // imm6 = xxxxxx } @@ -3153,15 +3226,18 @@ multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, SDNode OpNode> { def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, - OpcodeStr, !strconcat(Dt, "16"), v8i8, v8i16, OpNode> { + OpcodeStr, !strconcat(Dt, "16"), + v8i8, v8i16, shr_imm8, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, - OpcodeStr, !strconcat(Dt, "32"), v4i16, v4i32, OpNode> { + OpcodeStr, !strconcat(Dt, "32"), + v4i16, v4i32, shr_imm16, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, - OpcodeStr, !strconcat(Dt, "64"), v2i32, v2i64, OpNode> { + OpcodeStr, !strconcat(Dt, "64"), + v2i32, v2i64, shr_imm32, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } } @@ -3697,16 +3773,21 @@ def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VCNTiD, "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [(set DPR:$Vd, - (v2i32 (or (and DPR:$Vn, DPR:$src1), - (and DPR:$Vm, (vnotd DPR:$src1)))))]>; + [(set DPR:$Vd, (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; + +def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), + (and DPR:$Vm, (vnotd DPR:$Vd)))), + (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; + def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VCNTiQ, "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [(set QPR:$Vd, - (v4i32 (or (and QPR:$Vn, QPR:$src1), - (and QPR:$Vm, (vnotq QPR:$src1)))))]>; + [(set QPR:$Vd, (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; + +def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), + (and QPR:$Vm, (vnotq QPR:$Vd)))), + (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; // VBIF : Vector Bitwise Insert if False // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", @@ -3917,14 +3998,13 @@ defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, "vshl", "u", int_arm_neon_vshiftu>; + // VSHL : Vector Shift Left (Immediate) -defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl, - N2RegVShLFrm>; +defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; + // VSHR : Vector Shift Right (Immediate) -defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs, - N2RegVShRFrm>; -defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru, - N2RegVShRFrm>; +defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",NEONvshrs>; +defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",NEONvshru>; // VSHLL : Vector Shift Left Long defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; @@ -3957,10 +4037,8 @@ defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu>; // VRSHR : Vector Rounding Shift Right -defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs, - N2RegVShRFrm>; -defm VRSHRu : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru, - N2RegVShRFrm>; +defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",NEONvrshrs>; +defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",NEONvrshru>; // VRSHRN : Vector Rounding Shift Right and Narrow defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", @@ -3974,13 +4052,11 @@ defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, "vqshl", "u", int_arm_neon_vqshiftu>; // VQSHL : Vector Saturating Shift Left (Immediate) -defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls, - N2RegVShLFrm>; -defm VQSHLui : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu, - N2RegVShLFrm>; +defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>; +defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>; + // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) -defm VQSHLsu : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu, - N2RegVShLFrm>; +defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>; // VQSHRN : Vector Saturating Shift Right and Narrow defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", @@ -4018,9 +4094,10 @@ defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; // VSLI : Vector Shift Left and Insert -defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli, N2RegVShLFrm>; +defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; + // VSRI : Vector Shift Right and Insert -defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri, N2RegVShRFrm>; +defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; // Vector Absolute and Saturating Absolute. @@ -4362,14 +4439,8 @@ def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; -def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$V), (ins GPR:$R), - IIC_VMOVIS, "vdup", "32", "$V, $R", - [(set DPR:$V, (v2f32 (NEONvdup - (f32 (bitconvert GPR:$R)))))]>; -def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$V), (ins GPR:$R), - IIC_VMOVIS, "vdup", "32", "$V, $R", - [(set QPR:$V, (v4f32 (NEONvdup - (f32 (bitconvert GPR:$R)))))]>; +def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>; +def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; // VDUP : Vector Duplicate Lane (from scalar to all elements) @@ -4397,9 +4468,6 @@ def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16> { def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32> { let Inst{19} = lane{0}; } -def VDUPLNfd : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32> { - let Inst{19} = lane{0}; -} def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8> { let Inst{19-17} = lane{2-0}; } @@ -4409,9 +4477,12 @@ def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16> { def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32> { let Inst{19} = lane{0}; } -def VDUPLNfq : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32> { - let Inst{19} = lane{0}; -} + +def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), + (VDUPLN32d DPR:$Vm, imm:$lane)>; + +def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), + (VDUPLN32q DPR:$Vm, imm:$lane)>; def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, @@ -4426,7 +4497,7 @@ def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), - (v4f32 (VDUPLNfq (v2f32 (EXTRACT_SUBREG QPR:$src, + (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; @@ -4517,12 +4588,12 @@ class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; -def VREV64df : VREV64D<0b10, "vrev64", "32", v2f32>; +def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; -def VREV64qf : VREV64Q<0b10, "vrev64", "32", v4f32>; +def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>; // VREV32 : Vector Reverse elements within 32-bit words @@ -4628,8 +4699,8 @@ def VEXTq32 : VEXTq<"vext", "32", v4i32> { let Inst{9-8} = 0b00; } def VEXTqf : VEXTq<"vext", "32", v4f32> { - let Inst{11} = index{0}; - let Inst{10-8} = 0b000; + let Inst{11-10} = index{1-0}; + let Inst{9-8} = 0b00; } // VTRN : Vector Transpose diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 826ef46..8c542fe 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -27,22 +27,22 @@ def imm_comp_XFORM : SDNodeXForm<imm, [{ }]>; /// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7]. -def imm0_7 : PatLeaf<(i32 imm), [{ - return (uint32_t)N->getZExtValue() < 8; +def imm0_7 : ImmLeaf<i32, [{ + return Imm >= 0 && Imm < 8; }]>; def imm0_7_neg : PatLeaf<(i32 imm), [{ return (uint32_t)-N->getZExtValue() < 8; }], imm_neg_XFORM>; -def imm0_255 : PatLeaf<(i32 imm), [{ - return (uint32_t)N->getZExtValue() < 256; +def imm0_255 : ImmLeaf<i32, [{ + return Imm >= 0 && Imm < 256; }]>; def imm0_255_comp : PatLeaf<(i32 imm), [{ return ~((uint32_t)N->getZExtValue()) < 256; }]>; -def imm8_255 : PatLeaf<(i32 imm), [{ - return (uint32_t)N->getZExtValue() >= 8 && (uint32_t)N->getZExtValue() < 256; +def imm8_255 : ImmLeaf<i32, [{ + return Imm >= 8 && Imm < 256; }]>; def imm8_255_neg : PatLeaf<(i32 imm), [{ unsigned Val = -N->getZExtValue(); @@ -369,6 +369,15 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in { let Inst{2-0} = 0b000; } + def tBX_Rm : TI<(outs), (ins pred:$p, GPR:$Rm), IIC_Br, "bx${p}\t$Rm", + [/* for disassembly only */]>, + T1Special<{1,1,0,?}> { + // A6.2.3 & A8.6.25 + bits<4> Rm; + let Inst{6-3} = Rm; + let Inst{2-0} = 0b000; + } + // Alternative return instruction used by vararg functions. def tBX_RET_vararg : TI<(outs), (ins tGPR:$Rm), IIC_Br, "bx\t$Rm", @@ -712,6 +721,19 @@ def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, let Inst{7-0} = addr; } +// FIXME: Remove this entry when the above ldr.n workaround is fixed. +// For disassembly use only. +def tLDRpciDIS : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, + "ldr", "\t$Rt, $addr", + [/* disassembly only */]>, + T1Encoding<{0,1,0,0,1,?}> { + // A6.2 & A8.6.59 + bits<3> Rt; + bits<8> addr; + let Inst{10-8} = Rt; + let Inst{7-0} = addr; +} + // A8.6.194 & A8.6.192 defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4, t_addrmode_is4, AddrModeT1_4, @@ -1175,10 +1197,18 @@ def tREVSH : // A8.6.136 "revsh", "\t$Rd, $Rm", [(set tGPR:$Rd, (sext_inreg - (or (srl (and tGPR:$Rm, 0xFF00), (i32 8)), + (or (srl tGPR:$Rm, (i32 8)), (shl tGPR:$Rm, (i32 8))), i16))]>, Requires<[IsThumb, IsThumb1Only, HasV6]>; +def : T1Pat<(sext_inreg (or (srl (and tGPR:$Rm, 0xFF00), (i32 8)), + (shl tGPR:$Rm, (i32 8))), i16), + (tREVSH tGPR:$Rm)>, + Requires<[IsThumb, IsThumb1Only, HasV6]>; + +def : T1Pat<(sra (bswap tGPR:$Rm), (i32 16)), (tREVSH tGPR:$Rm)>, + Requires<[IsThumb, IsThumb1Only, HasV6]>; + // Rotate right register def tROR : // A8.6.139 T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), @@ -1322,10 +1352,8 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd), // Move between coprocessor and ARM core register -- for disassembly only // -class tMovRCopro<string opc, bit direction> - : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, - GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), - !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), +class tMovRCopro<string opc, bit direction, dag oops, dag iops> + : T1Cop<oops, iops, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), [/* For disassembly only; pattern left blank */]> { let Inst{27-24} = 0b1110; let Inst{20} = direction; @@ -1346,8 +1374,12 @@ class tMovRCopro<string opc, bit direction> let Inst{19-16} = CRn; } -def tMCR : tMovRCopro<"mcr", 0 /* from ARM core register to coprocessor */>; -def tMRC : tMovRCopro<"mrc", 1 /* from coprocessor to ARM core register */>; +def tMCR : tMovRCopro<"mcr", 0 /* from ARM core register to coprocessor */, + (outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, i32imm:$opc2)>; +def tMRC : tMovRCopro<"mrc", 1 /* from coprocessor to ARM core register */, + (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, + c_imm:$CRm, i32imm:$opc2)>; class tMovRRCopro<string opc, bit direction> : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), @@ -1420,7 +1452,7 @@ def tTPsoft : TIx2<0b11110, 0b11, 1, (outs), (ins), IIC_Br, // from some other function to get here, and we're using the stack frame for the // containing function to save/restore registers, we can't keep anything live in // regs across the eh_sjlj_setjmp(), else it will almost certainly have been -// tromped upon when we get here from a longjmp(). We force everthing out of +// tromped upon when we get here from a longjmp(). We force everything out of // registers except for our own input by listing the relevant registers in // Defs. By doing so, we also cause the prologue/epilogue code to actively // preserve all of the callee-saved resgisters, which is exactly what we want. diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 0e01be5..600a121 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -44,7 +44,9 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{ // t2_so_imm - Match a 32-bit immediate operand, which is an // 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit // immediate splatted into multiple bytes of the word. -def t2_so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_t2_so_imm(N); }]> { +def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{ + return ARM_AM::getT2SOImmVal(Imm) != -1; + }]> { let EncoderMethod = "getT2SOImmOpValue"; } @@ -61,49 +63,15 @@ def t2_so_imm_neg : Operand<i32>, return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1; }], t2_so_imm_neg_XFORM>; -// Break t2_so_imm's up into two pieces. This handles immediates with up to 16 -// bits set in them. This uses t2_so_imm2part to match and t2_so_imm2part_[12] -// to get the first/second pieces. -def t2_so_imm2part : Operand<i32>, - PatLeaf<(imm), [{ - return ARM_AM::isT2SOImmTwoPartVal((unsigned)N->getZExtValue()); - }]> { -} - -def t2_so_imm2part_1 : SDNodeXForm<imm, [{ - unsigned V = ARM_AM::getT2SOImmTwoPartFirst((unsigned)N->getZExtValue()); - return CurDAG->getTargetConstant(V, MVT::i32); -}]>; - -def t2_so_imm2part_2 : SDNodeXForm<imm, [{ - unsigned V = ARM_AM::getT2SOImmTwoPartSecond((unsigned)N->getZExtValue()); - return CurDAG->getTargetConstant(V, MVT::i32); -}]>; - -def t2_so_neg_imm2part : Operand<i32>, PatLeaf<(imm), [{ - return ARM_AM::isT2SOImmTwoPartVal(-(int)N->getZExtValue()); - }]> { -} - -def t2_so_neg_imm2part_1 : SDNodeXForm<imm, [{ - unsigned V = ARM_AM::getT2SOImmTwoPartFirst(-(int)N->getZExtValue()); - return CurDAG->getTargetConstant(V, MVT::i32); -}]>; - -def t2_so_neg_imm2part_2 : SDNodeXForm<imm, [{ - unsigned V = ARM_AM::getT2SOImmTwoPartSecond(-(int)N->getZExtValue()); - return CurDAG->getTargetConstant(V, MVT::i32); -}]>; - /// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31]. -def imm1_31 : PatLeaf<(i32 imm), [{ - return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 32; +def imm1_31 : ImmLeaf<i32, [{ + return (int32_t)Imm >= 1 && (int32_t)Imm < 32; }]>; /// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095]. def imm0_4095 : Operand<i32>, - PatLeaf<(i32 imm), [{ - return (uint32_t)N->getZExtValue() < 4096; + ImmLeaf<i32, [{ + return Imm >= 0 && Imm < 4096; }]>; def imm0_4095_neg : PatLeaf<(i32 imm), [{ @@ -118,6 +86,11 @@ def imm0_255_not : PatLeaf<(i32 imm), [{ return (uint32_t)(~N->getZExtValue()) < 255; }], imm_comp_XFORM>; +def lo5AllOne : PatLeaf<(i32 imm), [{ + // Returns true if all low 5-bits are 1. + return (((uint32_t)N->getZExtValue()) & 0x1FUL) == 0x1FUL; +}]>; + // Define Thumb2 specific addressing modes. // t2addrmode_imm12 := reg + imm12 @@ -129,6 +102,12 @@ def t2addrmode_imm12 : Operand<i32>, let ParserMatchClass = MemMode5AsmOperand; } +// t2ldrlabel := imm12 +def t2ldrlabel : Operand<i32> { + let EncoderMethod = "getAddrModeImm12OpValue"; +} + + // ADR instruction labels. def t2adrlabel : Operand<i32> { let EncoderMethod = "getT2AdrLabelOpValue"; @@ -173,6 +152,15 @@ def t2addrmode_so_reg : Operand<i32>, let ParserMatchClass = MemMode5AsmOperand; } +// t2addrmode_reg := reg +// Used by load/store exclusive instructions. Useful to enable right assembly +// parsing and printing. Not used for any codegen matching. +// +def t2addrmode_reg : Operand<i32> { + let PrintMethod = "printAddrMode7Operand"; + let MIOperandInfo = (ops tGPR); + let ParserMatchClass = MemMode7AsmOperand; +} //===----------------------------------------------------------------------===// // Multiclass helpers... @@ -700,49 +688,27 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{24-21} = opcod; } } +} // Carry setting variants -let isCodeGenOnly = 1, Defs = [CPSR] in { -multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, - bit Commutable = 0> { +// NOTE: CPSR def omitted because it will be handled by the custom inserter. +let usesCustomInserter = 1 in { +multiclass T2I_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> { // shifted imm - def ri : T2sTwoRegImm< - (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi, - opc, "\t$Rd, $Rn, $imm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>, - Requires<[IsThumb2]> { - let Inst{31-27} = 0b11110; - let Inst{25} = 0; - let Inst{24-21} = opcod; - let Inst{20} = 1; // The S bit. - let Inst{15} = 0; - } + def ri : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), + Size4Bytes, IIC_iALUi, + [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>; // register - def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr, - opc, ".w\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>, - Requires<[IsThumb2]> { + def rr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), + Size4Bytes, IIC_iALUr, + [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> { let isCommutable = Commutable; - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = opcod; - let Inst{20} = 1; // The S bit. - let Inst{14-12} = 0b000; // imm3 - let Inst{7-6} = 0b00; // imm2 - let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2sTwoRegShiftedReg< - (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), - IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>, - Requires<[IsThumb2]> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = opcod; - let Inst{20} = 1; // The S bit. - } -} + def rs : t2PseudoInst< + (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), + Size4Bytes, IIC_iALUsi, + [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>; } } @@ -864,6 +830,7 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, let Inst{15-12} = Rt; bits<17> addr; + let addr{12} = 1; // add = TRUE let Inst{19-16} = addr{16-13}; // Rn let Inst{23} = addr{12}; // U let Inst{11-0} = addr{11-0}; // imm @@ -911,7 +878,7 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, } // FIXME: Is the pci variant actually needed? - def pci : T2Ipc <(outs GPR:$Rt), (ins i32imm:$addr), iii, + def pci : T2Ipc <(outs GPR:$Rt), (ins t2ldrlabel:$addr), iii, opc, ".w\t$Rt, $addr", [(set GPR:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> { let isReMaterializable = 1; @@ -944,6 +911,7 @@ multiclass T2I_st<bits<2> opcod, string opc, let Inst{15-12} = Rt; bits<17> addr; + let addr{12} = 1; // add = TRUE let Inst{19-16} = addr{16-13}; // Rn let Inst{23} = addr{12}; // U let Inst{11-0} = addr{11-0}; // imm @@ -1398,7 +1366,7 @@ def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$Rn), // for disassembly only. // Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4 class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii> - : T2Ii8<(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc, + : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc, "\t$Rt, $addr", []> { let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; @@ -1440,42 +1408,48 @@ def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), def t2STR_PRE : T2Iidxldst<0, 0b10, 0, 1, (outs GPR:$base_wb), (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, - "str", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb", + "str", "\t$Rt, [$Rn, $addr]!", + "$Rn = $base_wb,@earlyclobber $base_wb", [(set GPR:$base_wb, (pre_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; def t2STR_POST : T2Iidxldst<0, 0b10, 0, 0, (outs GPR:$base_wb), (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), AddrModeT2_i8, IndexModePost, IIC_iStore_iu, - "str", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb", + "str", "\t$Rt, [$Rn], $addr", + "$Rn = $base_wb,@earlyclobber $base_wb", [(set GPR:$base_wb, (post_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; def t2STRH_PRE : T2Iidxldst<0, 0b01, 0, 1, (outs GPR:$base_wb), (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, - "strh", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb", + "strh", "\t$Rt, [$Rn, $addr]!", + "$Rn = $base_wb,@earlyclobber $base_wb", [(set GPR:$base_wb, (pre_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; def t2STRH_POST : T2Iidxldst<0, 0b01, 0, 0, (outs GPR:$base_wb), (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu, - "strh", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb", + "strh", "\t$Rt, [$Rn], $addr", + "$Rn = $base_wb,@earlyclobber $base_wb", [(set GPR:$base_wb, (post_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; def t2STRB_PRE : T2Iidxldst<0, 0b00, 0, 1, (outs GPR:$base_wb), (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu, - "strb", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb", + "strb", "\t$Rt, [$Rn, $addr]!", + "$Rn = $base_wb,@earlyclobber $base_wb", [(set GPR:$base_wb, (pre_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb), (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu, - "strb", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb", + "strb", "\t$Rt, [$Rn], $addr", + "$Rn = $base_wb,@earlyclobber $base_wb", [(set GPR:$base_wb, (post_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; @@ -1483,7 +1457,7 @@ def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb), // only. // Ref: A8.6.193 STR (immediate, Thumb) Encoding T4 class T2IstT<bits<2> type, string opc, InstrItinClass ii> - : T2Ii8<(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc, + : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc, "\t$Rt, $addr", []> { let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; @@ -1508,20 +1482,20 @@ def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>; // ldrd / strd pre / post variants // For disassembly only. -def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs GPR:$Rt, GPR:$Rt2), +def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2), (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, [$base, $imm]!", []>; -def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs GPR:$Rt, GPR:$Rt2), +def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2), (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, [$base], $imm", []>; def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs), - (ins GPR:$Rt, GPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm), + (ins rGPR:$Rt, rGPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm), IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base, $imm]!", []>; def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs), - (ins GPR:$Rt, GPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm), + (ins rGPR:$Rt, rGPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm), IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base], $imm", []>; // T2Ipl (Preload Data/Instruction) signals the memory system of possible future @@ -1541,6 +1515,7 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> { let Inst{15-12} = 0b1111; bits<17> addr; + let addr{12} = 1; // add = TRUE let Inst{19-16} = addr{16-13}; // Rn let Inst{23} = addr{12}; // U let Inst{11-0} = addr{11-0}; // imm12 @@ -1813,10 +1788,8 @@ defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>; defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>; -defm t2ADCS : T2I_adde_sube_s_irs<0b1010, "adc", - BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>; -defm t2SBCS : T2I_adde_sube_s_irs<0b1011, "sbc", - BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>; +defm t2ADCS : T2I_adde_sube_s_irs<BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>; +defm t2SBCS : T2I_adde_sube_s_irs<BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>; // RSB defm t2RSB : T2I_rbin_irs <0b1110, "rsb", @@ -1847,9 +1820,14 @@ def : T2Pat<(addc rGPR:$src, t2_so_imm_neg:$imm), // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. let AddedComplexity = 1 in -def : T2Pat<(adde rGPR:$src, imm0_255_not:$imm), +def : T2Pat<(adde_dead_carry rGPR:$src, imm0_255_not:$imm), + (t2SBCri rGPR:$src, imm0_255_not:$imm)>; +def : T2Pat<(adde_dead_carry rGPR:$src, t2_so_imm_not:$imm), + (t2SBCri rGPR:$src, t2_so_imm_not:$imm)>; +let AddedComplexity = 1 in +def : T2Pat<(adde_live_carry rGPR:$src, imm0_255_not:$imm), (t2SBCSri rGPR:$src, imm0_255_not:$imm)>; -def : T2Pat<(adde rGPR:$src, t2_so_imm_not:$imm), +def : T2Pat<(adde_live_carry rGPR:$src, t2_so_imm_not:$imm), (t2SBCSri rGPR:$src, t2_so_imm_not:$imm)>; // Select Bytes -- for disassembly only @@ -2052,6 +2030,10 @@ defm t2LSR : T2I_sh_ir<0b01, "lsr", BinOpFrag<(srl node:$LHS, node:$RHS)>>; defm t2ASR : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra node:$LHS, node:$RHS)>>; defm t2ROR : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>; +// (rotr x, (and y, 0x...1f)) ==> (ROR x, y) +def : Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)), + (t2RORrr rGPR:$lhs, rGPR:$rhs)>; + let Uses = [CPSR] in { def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi, "rrx", "\t$Rd, $Rm", @@ -2140,10 +2122,12 @@ def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm), IIC_iUNAsi, "bfc", "\t$Rd, $imm", [(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]> { let Inst{31-27} = 0b11110; + let Inst{26} = 0; // should be 0. let Inst{25} = 1; let Inst{24-20} = 0b10110; let Inst{19-16} = 0b1111; // Rn let Inst{15} = 0; + let Inst{5} = 0; // should be 0. bits<10> imm; let msb{4-0} = imm{9-5}; @@ -2176,9 +2160,11 @@ let Constraints = "$src = $Rd" in { [(set rGPR:$Rd, (ARMbfi rGPR:$src, rGPR:$Rn, bf_inv_mask_imm:$imm))]> { let Inst{31-27} = 0b11110; + let Inst{26} = 0; // should be 0. let Inst{25} = 1; let Inst{24-20} = 0b10110; let Inst{15} = 0; + let Inst{5} = 0; // should be 0. bits<10> imm; let msb{4-0} = imm{9-5}; @@ -2193,9 +2179,11 @@ let Constraints = "$src = $Rd" in { IIC_iBITi, "bfi", "\t$Rd, $Rn, $lsbit, $width", []> { let Inst{31-27} = 0b11110; + let Inst{26} = 0; // should be 0. let Inst{25} = 1; let Inst{24-20} = 0b10110; let Inst{15} = 0; + let Inst{5} = 0; // should be 0. bits<5> lsbit; bits<5> width; @@ -2607,9 +2595,15 @@ def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, "revsh", ".w\t$Rd, $Rm", [(set rGPR:$Rd, (sext_inreg - (or (srl (and rGPR:$Rm, 0xFF00), (i32 8)), + (or (srl rGPR:$Rm, (i32 8)), (shl rGPR:$Rm, (i32 8))), i16))]>; +def : T2Pat<(sext_inreg (or (srl (and rGPR:$Rm, 0xFF00), (i32 8)), + (shl rGPR:$Rm, (i32 8))), i16), + (t2REVSH rGPR:$Rm)>; + +def : T2Pat<(sra (bswap rGPR:$Rm), (i32 16)), (t2REVSH rGPR:$Rm)>; + def t2PKHBT : T2ThreeReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh), IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh", @@ -2843,9 +2837,9 @@ class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz, let Inst{5-4} = opcod; let Inst{3-0} = 0b1111; - bits<4> Rn; + bits<4> addr; bits<4> Rt; - let Inst{19-16} = Rn; + let Inst{19-16} = addr; let Inst{15-12} = Rt; } class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz, @@ -2859,37 +2853,37 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz, let Inst{5-4} = opcod; bits<4> Rd; - bits<4> Rn; + bits<4> addr; bits<4> Rt; - let Inst{11-8} = Rd; - let Inst{19-16} = Rn; + let Inst{3-0} = Rd; + let Inst{19-16} = addr; let Inst{15-12} = Rt; } let mayLoad = 1 in { -def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone, - Size4Bytes, NoItinerary, "ldrexb", "\t$Rt, [$Rn]", +def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone, + Size4Bytes, NoItinerary, "ldrexb", "\t$Rt, $addr", "", []>; -def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone, - Size4Bytes, NoItinerary, "ldrexh", "\t$Rt, [$Rn]", +def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone, + Size4Bytes, NoItinerary, "ldrexh", "\t$Rt, $addr", "", []>; -def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone, +def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone, Size4Bytes, NoItinerary, - "ldrex", "\t$Rt, [$Rn]", "", + "ldrex", "\t$Rt, $addr", "", []> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0000101; let Inst{11-8} = 0b1111; let Inst{7-0} = 0b00000000; // imm8 = 0 - bits<4> Rn; bits<4> Rt; - let Inst{19-16} = Rn; + bits<4> addr; + let Inst{19-16} = addr; let Inst{15-12} = Rt; } -def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), (ins rGPR:$Rn), +def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), (ins t2addrmode_reg:$addr), AddrModeNone, Size4Bytes, NoItinerary, - "ldrexd", "\t$Rt, $Rt2, [$Rn]", "", + "ldrexd", "\t$Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}> { bits<4> Rt2; let Inst{11-8} = Rt2; @@ -2897,31 +2891,31 @@ def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), (ins rGPR:$Rn), } let mayStore = 1, Constraints = "@earlyclobber $Rd" in { -def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn), - AddrModeNone, Size4Bytes, NoItinerary, - "strexb", "\t$Rd, $Rt, [$Rn]", "", []>; -def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn), - AddrModeNone, Size4Bytes, NoItinerary, - "strexh", "\t$Rd, $Rt, [$Rn]", "", []>; -def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn), - AddrModeNone, Size4Bytes, NoItinerary, - "strex", "\t$Rd, $Rt, [$Rn]", "", - []> { +def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr), + AddrModeNone, Size4Bytes, NoItinerary, + "strexb", "\t$Rd, $Rt, $addr", "", []>; +def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr), + AddrModeNone, Size4Bytes, NoItinerary, + "strexh", "\t$Rd, $Rt, $addr", "", []>; +def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr), + AddrModeNone, Size4Bytes, NoItinerary, + "strex", "\t$Rd, $Rt, $addr", "", + []> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0000100; let Inst{7-0} = 0b00000000; // imm8 = 0 bits<4> Rd; - bits<4> Rn; + bits<4> addr; bits<4> Rt; let Inst{11-8} = Rd; - let Inst{19-16} = Rn; + let Inst{19-16} = addr; let Inst{15-12} = Rt; } def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), - (ins rGPR:$Rt, rGPR:$Rt2, rGPR:$Rn), + (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_reg:$addr), AddrModeNone, Size4Bytes, NoItinerary, - "strexd", "\t$Rd, $Rt, $Rt2, [$Rn]", "", [], + "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}> { bits<4> Rt2; let Inst{11-8} = Rt2; @@ -2965,7 +2959,7 @@ let isCall = 1, // here, and we're using the stack frame for the containing function to // save/restore registers, we can't keep anything live in regs across // the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon -// when we get here from a longjmp(). We force everthing out of registers +// when we get here from a longjmp(). We force everything out of registers // except for our own input by listing the relevant registers in Defs. By // doing so, we also cause the prologue/epilogue code to actively preserve // all of the callee-saved resgisters, which is exactly what we want. @@ -3238,19 +3232,20 @@ class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin, bits<4> Rn; let Inst{19-16} = Rn; + let Inst{15-0} = 0xc000; } def t2RFEDBW : T2RFE<0b111010000011, - (outs), (ins rGPR:$Rn), NoItinerary, "rfedb", "\t$Rn!", + (outs), (ins GPR:$Rn), NoItinerary, "rfedb", "\t$Rn!", [/* For disassembly only; pattern left blank */]>; def t2RFEDB : T2RFE<0b111010000001, - (outs), (ins rGPR:$Rn), NoItinerary, "rfeab", "\t$Rn", + (outs), (ins GPR:$Rn), NoItinerary, "rfedb", "\t$Rn", [/* For disassembly only; pattern left blank */]>; def t2RFEIAW : T2RFE<0b111010011011, - (outs), (ins rGPR:$Rn), NoItinerary, "rfeia", "\t$Rn!", + (outs), (ins GPR:$Rn), NoItinerary, "rfeia", "\t$Rn!", [/* For disassembly only; pattern left blank */]>; def t2RFEIA : T2RFE<0b111010011001, - (outs), (ins rGPR:$Rn), NoItinerary, "rfeia", "\t$Rn", + (outs), (ins GPR:$Rn), NoItinerary, "rfeia", "\t$Rn", [/* For disassembly only; pattern left blank */]>; //===----------------------------------------------------------------------===// @@ -3352,10 +3347,8 @@ def t2MSR : T2SpecialReg<0b111100111000 /* op31-20 */, 0b10 /* op15-14 */, // Move between coprocessor and ARM core register -- for disassembly only // -class t2MovRCopro<string opc, bit direction> - : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, - GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), - !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), +class t2MovRCopro<string opc, bit direction, dag oops, dag iops> + : T2Cop<oops, iops, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), [/* For disassembly only; pattern left blank */]> { let Inst{27-24} = 0b1110; let Inst{20} = direction; @@ -3376,8 +3369,12 @@ class t2MovRCopro<string opc, bit direction> let Inst{19-16} = CRn; } -def t2MCR2 : t2MovRCopro<"mcr2", 0 /* from ARM core register to coprocessor */>; -def t2MRC2 : t2MovRCopro<"mrc2", 1 /* from coprocessor to ARM core register */>; +def t2MCR2 : t2MovRCopro<"mcr2", 0 /* from ARM core register to coprocessor */, + (outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, i32imm:$opc2)>; +def t2MRC2 : t2MovRCopro<"mrc2", 1 /* from coprocessor to ARM core register */, + (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, + c_imm:$CRm, i32imm:$opc2)>; class t2MovRRCopro<string opc, bit direction> : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 2990283..376bd96 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -101,14 +101,6 @@ multiclass vfp_ldst_mult<string asm, bit L_bit, let Inst{21} = 1; // Writeback let Inst{20} = L_bit; } - def DDB : - AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), - IndexModeNone, itin, - !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> { - let Inst{24-23} = 0b10; // Decrement Before - let Inst{21} = 0; // No writeback - let Inst{20} = L_bit; - } def DDB_UPD : AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), IndexModeUpd, itin_upd, @@ -143,18 +135,6 @@ multiclass vfp_ldst_mult<string asm, bit L_bit, // VFP pipelines. let D = VFPNeonDomain; } - def SDB : - AXSI4<(outs), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops), - IndexModeNone, itin, - !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> { - let Inst{24-23} = 0b10; // Decrement Before - let Inst{21} = 0; // No writeback - let Inst{20} = L_bit; - - // Some single precision VFP instructions may be executed on both NEON and - // VFP pipelines. - let D = VFPNeonDomain; - } def SDB_UPD : AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops), IndexModeUpd, itin_upd, @@ -467,6 +447,10 @@ def VMOVRS : AVConv2I<0b11100001, 0b1010, let Inst{6-5} = 0b00; let Inst{3-0} = 0b0000; + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } def VMOVSR : AVConv4I<0b11100000, 0b1010, @@ -484,6 +468,10 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010, let Inst{6-5} = 0b00; let Inst{3-0} = 0b0000; + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } let neverHasSideEffects = 1 in { @@ -503,6 +491,10 @@ def VMOVRRD : AVConv3I<0b11000101, 0b1011, let Inst{19-16} = Rt2; let Inst{7-6} = 0b00; + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } def VMOVRRS : AVConv3I<0b11000101, 0b1010, @@ -510,6 +502,10 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010, IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2", [/* For disassembly only; pattern left blank */]> { let Inst{7-6} = 0b00; + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } } // neverHasSideEffects @@ -532,6 +528,10 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011, let Inst{19-16} = Rt2; let Inst{7-6} = 0b00; + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } let neverHasSideEffects = 1 in @@ -540,6 +540,10 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010, IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", [/* For disassembly only; pattern left blank */]> { let Inst{7-6} = 0b00; + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } // FMRDH: SPR -> GPR @@ -972,33 +976,15 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), // let neverHasSideEffects = 1 in { -def VMOVDcc : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, - (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), - IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", +def VMOVDcc : ARMPseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, pred:$p), + Size4Bytes, IIC_fpUNA64, [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>, RegConstraint<"$Dn = $Dd">; -def VMOVScc : ASuI<0b11101, 0b11, 0b0000, 0b01, 0, - (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), - IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", +def VMOVScc : ARMPseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, pred:$p), + Size4Bytes, IIC_fpUNA32, [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>, RegConstraint<"$Sn = $Sd">; - -def VNEGDcc : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, - (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), - IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm", - [/*(set DPR:$Dd, (ARMcneg DPR:$Dn, DPR:$Dm, imm:$cc))*/]>, - RegConstraint<"$Dn = $Dd">; - -def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0, - (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), - IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm", - [/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>, - RegConstraint<"$Sn = $Sd"> { - // Some single precision VFP instructions may be executed on both NEON and - // VFP pipelines on A8. - let D = VFPNeonA8Domain; -} } // neverHasSideEffects //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index d9dc5cd..df89fad 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -79,7 +79,7 @@ namespace { unsigned Position; MachineBasicBlock::iterator MBBI; bool Merged; - MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, + MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, MachineBasicBlock::iterator i) : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} }; @@ -174,7 +174,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VLDMSIA; - case ARM_AM::db: return ARM::VLDMSDB; + case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. } break; case ARM::VSTRS: @@ -182,7 +182,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VSTMSIA; - case ARM_AM::db: return ARM::VSTMSDB; + case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. } break; case ARM::VLDRD: @@ -190,7 +190,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VLDMDIA; - case ARM_AM::db: return ARM::VLDMDDB; + case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. } break; case ARM::VSTRD: @@ -198,7 +198,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VSTMDIA; - case ARM_AM::db: return ARM::VSTMDDB; + case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. } break; } @@ -246,13 +246,9 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) { case ARM::t2LDMDB_UPD: case ARM::t2STMDB: case ARM::t2STMDB_UPD: - case ARM::VLDMSDB: case ARM::VLDMSDB_UPD: - case ARM::VSTMSDB: case ARM::VSTMSDB_UPD: - case ARM::VLDMDDB: case ARM::VLDMDDB_UPD: - case ARM::VSTMDDB: case ARM::VSTMDDB_UPD: return ARM_AM::db; @@ -312,6 +308,10 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // VLDM/VSTM do not support DB mode without also updating the base reg. Mode = ARM_AM::db; else if (Offset != 0) { + // Check if this is a supported opcode before we insert instructions to + // calculate a new base register. + if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; + // If starting offset isn't zero, insert a MI to materialize a new base. // But only do so if it is cost effective, i.e. merging more than two // loads / stores. @@ -354,6 +354,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD); Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); + if (!Opcode) return false; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) .addReg(Base, getKillRegState(BaseKill)) .addImm(Pred).addReg(PredReg); @@ -453,6 +454,25 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned PRegNum = PMO.isUndef() ? UINT_MAX : getARMRegisterNumbering(PReg); unsigned Count = 1; + unsigned Limit = ~0U; + + // vldm / vstm limit are 32 for S variants, 16 for D variants. + + switch (Opcode) { + default: break; + case ARM::VSTRS: + Limit = 32; + break; + case ARM::VSTRD: + Limit = 16; + break; + case ARM::VLDRD: + Limit = 16; + break; + case ARM::VLDRS: + Limit = 32; + break; + } for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { int NewOffset = MemOps[i].Offset; @@ -460,13 +480,13 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Reg = MO.getReg(); unsigned RegNum = MO.isUndef() ? UINT_MAX : getARMRegisterNumbering(Reg); - // Register numbers must be in ascending order. For VFP, the registers - // must also be consecutive and there is a limit of 16 double-word - // registers per instruction. + // Register numbers must be in ascending order. For VFP / NEON load and + // store multiples, the registers must also be consecutive and within the + // limit on the number of registers per instruction. if (Reg != ARM::SP && NewOffset == Offset + (int)Size && - ((isNotVFP && RegNum > PRegNum) - || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) { + ((isNotVFP && RegNum > PRegNum) || + ((Count < Limit) && RegNum == PRegNum+1))) { Offset += Size; PRegNum = RegNum; ++Count; @@ -567,14 +587,10 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { case ARM::t2STMIA: case ARM::t2STMDB: case ARM::VLDMSIA: - case ARM::VLDMSDB: case ARM::VSTMSIA: - case ARM::VSTMSDB: return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; case ARM::VLDMDIA: - case ARM::VLDMDDB: case ARM::VSTMDIA: - case ARM::VSTMDDB: return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; } } @@ -624,7 +640,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, } break; case ARM::VLDMSIA: - case ARM::VLDMSDB: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VLDMSIA_UPD; @@ -632,7 +647,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, } break; case ARM::VLDMDIA: - case ARM::VLDMDDB: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VLDMDIA_UPD; @@ -640,7 +654,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, } break; case ARM::VSTMSIA: - case ARM::VSTMSDB: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VSTMSIA_UPD; @@ -648,7 +661,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, } break; case ARM::VSTMDIA: - case ARM::VSTMDDB: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VSTMDIA_UPD; @@ -749,7 +761,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, MIB.addOperand(MI->getOperand(OpNum)); // Transfer memoperands. - (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); MBB.erase(MBBI); return true; @@ -1275,14 +1287,14 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, CurrPred, CurrPredReg, Scratch, MemOps, Merges); - // Try folding preceeding/trailing base inc/dec into the generated + // Try folding preceding/trailing base inc/dec into the generated // LDM/STM ops. for (unsigned i = 0, e = Merges.size(); i < e; ++i) if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) ++NumMerges; NumMerges += Merges.size(); - // Try folding preceeding/trailing base inc/dec into those load/store + // Try folding preceding/trailing base inc/dec into those load/store // that were not merged to form LDM/STM ops. for (unsigned i = 0; i != NumMemOps; ++i) if (!MemOps[i].Merged) @@ -1292,7 +1304,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // RS may be pointing to an instruction that's deleted. RS->skipTo(prior(MBBI)); } else if (NumMemOps == 1) { - // Try folding preceeding/trailing base inc/dec into the single + // Try folding preceding/trailing base inc/dec into the single // load/store. if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { ++NumMerges; @@ -1322,7 +1334,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { } /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops -/// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it +/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it /// directly restore the value of LR into pc. /// ldmfd sp!, {..., lr} /// bx lr @@ -1530,15 +1542,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, // Then make sure the immediate offset fits. int OffImm = getMemoryOpOffset(Op0); if (isT2) { - if (OffImm < 0) { - if (OffImm < -255) - // Can't fall back to t2LDRi8 / t2STRi8. - return false; - } else { - int Limit = (1 << 8) * Scale; - if (OffImm >= Limit || (OffImm & (Scale-1))) - return false; - } + int Limit = (1 << 8) * Scale; + if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) + return false; Offset = OffImm; } else { ARM_AM::AddrOpc AddSub = ARM_AM::add; diff --git a/lib/Target/ARM/ARMMCAsmInfo.cpp b/lib/Target/ARM/ARMMCAsmInfo.cpp index 53edfca..a3f89e9 100644 --- a/lib/Target/ARM/ARMMCAsmInfo.cpp +++ b/lib/Target/ARM/ARMMCAsmInfo.cpp @@ -12,8 +12,16 @@ //===----------------------------------------------------------------------===// #include "ARMMCAsmInfo.h" +#include "llvm/Support/CommandLine.h" + using namespace llvm; +cl::opt<bool> +EnableARMEHABI("arm-enable-ehabi", cl::Hidden, + cl::desc("Generate ARM EHABI tables"), + cl::init(false)); + + static const char *const arm_asm_table[] = { "{r0}", "r0", "{r1}", "r1", @@ -65,4 +73,8 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo() { DwarfRequiresFrameSection = false; SupportsDebugInformation = true; + + // Exceptions handling + if (EnableARMEHABI) + ExceptionsType = ExceptionHandling::ARM; } diff --git a/lib/Target/ARM/ARMMCCodeEmitter.cpp b/lib/Target/ARM/ARMMCCodeEmitter.cpp index 6d7b485..10607b1 100644 --- a/lib/Target/ARM/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/ARMMCCodeEmitter.cpp @@ -278,6 +278,15 @@ public: unsigned getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getShiftRight8Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getShiftRight16Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getShiftRight32Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getShiftRight64Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned NEONThumb2DataIPostEncoder(const MCInst &MI, unsigned EncodedValue) const; unsigned NEONThumb2LoadStorePostEncoder(const MCInst &MI, @@ -1201,6 +1210,30 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op, return MO.getReg(); } +unsigned ARMMCCodeEmitter:: +getShiftRight8Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const { + return 8 - MI.getOperand(Op).getImm(); +} + +unsigned ARMMCCodeEmitter:: +getShiftRight16Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const { + return 16 - MI.getOperand(Op).getImm(); +} + +unsigned ARMMCCodeEmitter:: +getShiftRight32Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const { + return 32 - MI.getOperand(Op).getImm(); +} + +unsigned ARMMCCodeEmitter:: +getShiftRight64Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const { + return 64 - MI.getOperand(Op).getImm(); +} + void ARMMCCodeEmitter:: EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const { diff --git a/lib/Target/ARM/ARMMCExpr.h b/lib/Target/ARM/ARMMCExpr.h index d42f766..0a2e883 100644 --- a/lib/Target/ARM/ARMMCExpr.h +++ b/lib/Target/ARM/ARMMCExpr.h @@ -60,6 +60,9 @@ public: bool EvaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout) const; void AddValueSymbols(MCAssembler *) const; + const MCSection *FindAssociatedSection() const { + return getSubExpr()->FindAssociatedSection(); + } static bool classof(const MCExpr *E) { return E->getKind() == MCExpr::Target; diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp index ad51bc1..1cba1ba 100644 --- a/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/lib/Target/ARM/ARMRegisterInfo.cpp @@ -12,26 +12,8 @@ //===----------------------------------------------------------------------===// #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMBaseInstrInfo.h" -#include "ARMInstrInfo.h" -#include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" -#include "ARMSubtarget.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLocation.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallVector.h" using namespace llvm; ARMRegisterInfo::ARMRegisterInfo(const ARMBaseInstrInfo &tii, diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 22d15b5..54bf82a 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -70,6 +70,8 @@ def R4 : ARMReg< 4, "r4">, DwarfRegNum<[4]>; def R5 : ARMReg< 5, "r5">, DwarfRegNum<[5]>; def R6 : ARMReg< 6, "r6">, DwarfRegNum<[6]>; def R7 : ARMReg< 7, "r7">, DwarfRegNum<[7]>; +// These require 32-bit instructions. +let CostPerUse = 1 in { def R8 : ARMReg< 8, "r8">, DwarfRegNum<[8]>; def R9 : ARMReg< 9, "r9">, DwarfRegNum<[9]>; def R10 : ARMReg<10, "r10">, DwarfRegNum<[10]>; @@ -78,6 +80,7 @@ def R12 : ARMReg<12, "r12">, DwarfRegNum<[12]>; def SP : ARMReg<13, "sp">, DwarfRegNum<[13]>; def LR : ARMReg<14, "lr">, DwarfRegNum<[14]>; def PC : ARMReg<15, "pc">, DwarfRegNum<[15]>; +} // Float registers def S0 : ARMFReg< 0, "s0">; def S1 : ARMFReg< 1, "s1">; @@ -99,33 +102,41 @@ def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">; // Aliases of the F* registers used to hold 64-bit fp values (doubles) let SubRegIndices = [ssub_0, ssub_1] in { -def D0 : ARMReg< 0, "d0", [S0, S1]>; -def D1 : ARMReg< 1, "d1", [S2, S3]>; -def D2 : ARMReg< 2, "d2", [S4, S5]>; -def D3 : ARMReg< 3, "d3", [S6, S7]>; -def D4 : ARMReg< 4, "d4", [S8, S9]>; -def D5 : ARMReg< 5, "d5", [S10, S11]>; -def D6 : ARMReg< 6, "d6", [S12, S13]>; -def D7 : ARMReg< 7, "d7", [S14, S15]>; -def D8 : ARMReg< 8, "d8", [S16, S17]>; -def D9 : ARMReg< 9, "d9", [S18, S19]>; -def D10 : ARMReg<10, "d10", [S20, S21]>; -def D11 : ARMReg<11, "d11", [S22, S23]>; -def D12 : ARMReg<12, "d12", [S24, S25]>; -def D13 : ARMReg<13, "d13", [S26, S27]>; -def D14 : ARMReg<14, "d14", [S28, S29]>; -def D15 : ARMReg<15, "d15", [S30, S31]>; +def D0 : ARMReg< 0, "d0", [S0, S1]>, DwarfRegNum<[256]>; +def D1 : ARMReg< 1, "d1", [S2, S3]>, DwarfRegNum<[257]>; +def D2 : ARMReg< 2, "d2", [S4, S5]>, DwarfRegNum<[258]>; +def D3 : ARMReg< 3, "d3", [S6, S7]>, DwarfRegNum<[259]>; +def D4 : ARMReg< 4, "d4", [S8, S9]>, DwarfRegNum<[260]>; +def D5 : ARMReg< 5, "d5", [S10, S11]>, DwarfRegNum<[261]>; +def D6 : ARMReg< 6, "d6", [S12, S13]>, DwarfRegNum<[262]>; +def D7 : ARMReg< 7, "d7", [S14, S15]>, DwarfRegNum<[263]>; +def D8 : ARMReg< 8, "d8", [S16, S17]>, DwarfRegNum<[264]>; +def D9 : ARMReg< 9, "d9", [S18, S19]>, DwarfRegNum<[265]>; +def D10 : ARMReg<10, "d10", [S20, S21]>, DwarfRegNum<[266]>; +def D11 : ARMReg<11, "d11", [S22, S23]>, DwarfRegNum<[267]>; +def D12 : ARMReg<12, "d12", [S24, S25]>, DwarfRegNum<[268]>; +def D13 : ARMReg<13, "d13", [S26, S27]>, DwarfRegNum<[269]>; +def D14 : ARMReg<14, "d14", [S28, S29]>, DwarfRegNum<[270]>; +def D15 : ARMReg<15, "d15", [S30, S31]>, DwarfRegNum<[271]>; } // VFP3 defines 16 additional double registers -def D16 : ARMFReg<16, "d16">; def D17 : ARMFReg<17, "d17">; -def D18 : ARMFReg<18, "d18">; def D19 : ARMFReg<19, "d19">; -def D20 : ARMFReg<20, "d20">; def D21 : ARMFReg<21, "d21">; -def D22 : ARMFReg<22, "d22">; def D23 : ARMFReg<23, "d23">; -def D24 : ARMFReg<24, "d24">; def D25 : ARMFReg<25, "d25">; -def D26 : ARMFReg<26, "d26">; def D27 : ARMFReg<27, "d27">; -def D28 : ARMFReg<28, "d28">; def D29 : ARMFReg<29, "d29">; -def D30 : ARMFReg<30, "d30">; def D31 : ARMFReg<31, "d31">; +def D16 : ARMFReg<16, "d16">, DwarfRegNum<[272]>; +def D17 : ARMFReg<17, "d17">, DwarfRegNum<[273]>; +def D18 : ARMFReg<18, "d18">, DwarfRegNum<[274]>; +def D19 : ARMFReg<19, "d19">, DwarfRegNum<[275]>; +def D20 : ARMFReg<20, "d20">, DwarfRegNum<[276]>; +def D21 : ARMFReg<21, "d21">, DwarfRegNum<[277]>; +def D22 : ARMFReg<22, "d22">, DwarfRegNum<[278]>; +def D23 : ARMFReg<23, "d23">, DwarfRegNum<[279]>; +def D24 : ARMFReg<24, "d24">, DwarfRegNum<[280]>; +def D25 : ARMFReg<25, "d25">, DwarfRegNum<[281]>; +def D26 : ARMFReg<26, "d26">, DwarfRegNum<[282]>; +def D27 : ARMFReg<27, "d27">, DwarfRegNum<[283]>; +def D28 : ARMFReg<28, "d28">, DwarfRegNum<[284]>; +def D29 : ARMFReg<29, "d29">, DwarfRegNum<[285]>; +def D30 : ARMFReg<30, "d30">, DwarfRegNum<[286]>; +def D31 : ARMFReg<31, "d31">, DwarfRegNum<[287]>; // Advanced SIMD (NEON) defines 16 quad-word aliases let SubRegIndices = [dsub_0, dsub_1], diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 82c6735..49fedf6 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -656,19 +656,19 @@ def CortexA9Itineraries : ProcessorItineraries< [1, 1, 1]>, // // Single-precision to Integer Move + // + // On A9 move-from-VFP is free to issue with no stall if other VFP + // operations are in flight. I assume it still can't dual-issue though. InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, - InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_DRegsVFP], 0, Required>, - InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_NPipe]>], + InstrStage<1, [A9_MUX0], 0>], [2, 1]>, // // Double-precision to Integer Move + // + // On A9 move-from-VFP is free to issue with no stall if other VFP + // operations are in flight. I assume it still can't dual-issue though. InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, - InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_DRegsVFP], 0, Required>, - InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_NPipe]>], + InstrStage<1, [A9_MUX0], 0>], [2, 1, 1]>, // // Single-precision FP Load @@ -691,20 +691,22 @@ def CortexA9Itineraries : ProcessorItineraries< [2, 1]>, // // FP Load Multiple + // FIXME: assumes 2 doubles which requires 2 LS cycles. InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>, // // FP Load Multiple + update + // FIXME: assumes 2 doubles which requires 2 LS cycles. InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>, // // Single-precision FP Store InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -725,205 +727,206 @@ def CortexA9Itineraries : ProcessorItineraries< [1, 1]>, // // FP Store Multiple + // FIXME: assumes 2 doubles which requires 2 LS cycles. InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>, // // FP Store Multiple + update + // FIXME: assumes 2 doubles which requires 2 LS cycles. InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>, // NEON // VLD1 - // FIXME: Conservatively assume insufficent alignment. InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [2, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [1, 1]>, // VLD1x2 InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [2, 2, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [1, 1, 1]>, // VLD1x3 InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [2, 2, 3, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 2, 1]>, // VLD1x4 InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [2, 2, 3, 3, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 2, 2, 1]>, // VLD1u InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [2, 2, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [1, 2, 1]>, // VLD1x2u InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [2, 2, 2, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [1, 1, 2, 1]>, // VLD1x3u InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [2, 2, 3, 2, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 2, 2, 1]>, // VLD1x4u InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [2, 2, 3, 3, 2, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 2, 2, 2, 1]>, // // VLD1ln InstrItinData<IIC_VLD1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [4, 1, 1, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [3, 1, 1, 1]>, // // VLD1lnu InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [4, 2, 1, 1, 1, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [3, 2, 1, 1, 1, 1]>, // // VLD1dup InstrItinData<IIC_VLD1dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [3, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [2, 1]>, // // VLD1dupu InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [3, 2, 1, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [2, 2, 1, 1]>, // // VLD2 InstrItinData<IIC_VLD2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [3, 3, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [2, 2, 1]>, // // VLD2x2 InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [3, 4, 3, 4, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [2, 3, 2, 3, 1]>, // // VLD2ln InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [4, 4, 1, 1, 1, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [3, 3, 1, 1, 1, 1]>, // // VLD2u InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [3, 3, 2, 1, 1, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [2, 2, 2, 1, 1, 1]>, // // VLD2x2u InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [3, 4, 3, 4, 2, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [2, 3, 2, 3, 2, 1]>, // // VLD2lnu InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [4, 4, 2, 1, 1, 1, 1, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [3, 3, 2, 1, 1, 1, 1, 1]>, // // VLD2dup InstrItinData<IIC_VLD2dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [3, 3, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [2, 2, 1]>, // // VLD2dupu InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [3, 3, 2, 1, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [2, 2, 2, 1, 1]>, // // VLD3 InstrItinData<IIC_VLD3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<10,[A9_DRegsVFP], 0, Reserved>, - InstrStage<4, [A9_NPipe], 0>, - InstrStage<4, [A9_LSUnit]>], - [4, 4, 5, 1]>, + InstrStage<9,[A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [3, 3, 4, 1]>, // // VLD3ln InstrItinData<IIC_VLD3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -938,10 +941,10 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VLD3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<10,[A9_DRegsVFP], 0, Reserved>, - InstrStage<4, [A9_NPipe], 0>, - InstrStage<4, [A9_LSUnit]>], - [4, 4, 5, 2, 1]>, + InstrStage<9,[A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [3, 3, 4, 2, 1]>, // // VLD3lnu InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -974,108 +977,108 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VLD4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<10,[A9_DRegsVFP], 0, Reserved>, - InstrStage<4, [A9_NPipe], 0>, - InstrStage<4, [A9_LSUnit]>], - [4, 4, 5, 5, 1]>, + InstrStage<9,[A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [3, 3, 4, 4, 1]>, // // VLD4ln InstrItinData<IIC_VLD4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<11,[A9_DRegsVFP], 0, Reserved>, - InstrStage<5, [A9_NPipe], 0>, - InstrStage<5, [A9_LSUnit]>], - [5, 5, 6, 6, 1, 1, 1, 1, 2, 2]>, + InstrStage<10,[A9_DRegsVFP], 0, Reserved>, + InstrStage<4, [A9_NPipe], 0>, + InstrStage<4, [A9_LSUnit]>], + [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>, // // VLD4u InstrItinData<IIC_VLD4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<10,[A9_DRegsVFP], 0, Reserved>, - InstrStage<4, [A9_NPipe], 0>, - InstrStage<4, [A9_LSUnit]>], - [4, 4, 5, 5, 2, 1]>, + InstrStage<9,[A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [3, 3, 4, 4, 2, 1]>, // // VLD4lnu InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<11,[A9_DRegsVFP], 0, Reserved>, - InstrStage<5, [A9_NPipe], 0>, - InstrStage<5, [A9_LSUnit]>], - [5, 5, 6, 6, 2, 1, 1, 1, 1, 1, 2, 2]>, + InstrStage<10,[A9_DRegsVFP], 0, Reserved>, + InstrStage<4, [A9_NPipe], 0>, + InstrStage<4, [A9_LSUnit]>], + [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>, // // VLD4dup InstrItinData<IIC_VLD4dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [3, 3, 4, 4, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [2, 2, 3, 3, 1]>, // // VLD4dupu InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [3, 3, 4, 4, 2, 1, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [2, 2, 3, 3, 2, 1, 1]>, // // VST1 InstrItinData<IIC_VST1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, // // VST1x2 InstrItinData<IIC_VST1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>, // // VST1x3 InstrItinData<IIC_VST1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1, 2]>, // // VST1x4 InstrItinData<IIC_VST1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1, 2, 2]>, // // VST1u InstrItinData<IIC_VST1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1, 1]>, // // VST1x2u InstrItinData<IIC_VST1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1]>, // // VST1x3u @@ -1083,44 +1086,44 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1, 2]>, // // VST1x4u InstrItinData<IIC_VST1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1, 2, 2]>, // // VST1ln InstrItinData<IIC_VST1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, // // VST1lnu InstrItinData<IIC_VST1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1, 1]>, // // VST2 InstrItinData<IIC_VST2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>, // // VST2x2 @@ -1136,9 +1139,9 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VST2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1]>, // // VST2x2u @@ -1154,36 +1157,36 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VST2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>, // // VST2lnu InstrItinData<IIC_VST2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1]>, // // VST3 InstrItinData<IIC_VST3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1, 2]>, // // VST3u InstrItinData<IIC_VST3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1, 2]>, // // VST3ln @@ -1208,36 +1211,36 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VST4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1, 2, 2]>, // // VST4u InstrItinData<IIC_VST4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1, 2, 2]>, // // VST4ln InstrItinData<IIC_VST4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1, 2, 2]>, // // VST4lnu InstrItinData<IIC_VST4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1, 2, 2]>, // diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 2b9202b..aa1e398 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -35,7 +35,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, // This requires 4-byte alignment. if ((Align & 3) != 0) return SDValue(); - // This requires the copy size to be a constant, preferrably + // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (!ConstantSize) diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 1465984..c6f266b 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -38,6 +38,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, , ARMFPUType(None) , UseNEONForSinglePrecisionFP(false) , SlowFPVMLx(false) + , HasVMLxForwarding(false) , SlowFPBrcc(false) , IsThumb(isT) , ThumbMode(Thumb1) @@ -51,6 +52,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, , HasT2ExtractPack(false) , HasDataBarrier(false) , Pref32BitThumb(false) + , AvoidCPSRPartialUpdate(false) , HasMPExtension(false) , FPOnlySP(false) , AllowsUnalignedMem(false) diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 76c1c3f..0271c87 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -61,6 +61,10 @@ protected: /// whether the FP VML[AS] instructions are slow (if so, don't use them). bool SlowFPVMLx; + /// HasVMLxForwarding - If true, NEON has special multiplier accumulator + /// forwarding to allow mul + mla being issued back to back. + bool HasVMLxForwarding; + /// SlowFPBrcc - True if floating point compare + branch is slow. bool SlowFPBrcc; @@ -106,6 +110,11 @@ protected: /// over 16-bit ones. bool Pref32BitThumb; + /// AvoidCPSRPartialUpdate - If true, codegen would avoid using instructions + /// that partially update CPSR and add false dependency on the previous + /// CPSR setting instruction. + bool AvoidCPSRPartialUpdate; + /// HasMPExtension - True if the subtarget supports Multiprocessing /// extension (ARMv7 only). bool HasMPExtension; @@ -182,15 +191,19 @@ protected: bool hasT2ExtractPack() const { return HasT2ExtractPack; } bool hasDataBarrier() const { return HasDataBarrier; } bool useFPVMLx() const { return !SlowFPVMLx; } + bool hasVMLxForwarding() const { return HasVMLxForwarding; } bool isFPBrccSlow() const { return SlowFPBrcc; } bool isFPOnlySP() const { return FPOnlySP; } bool prefers32BitThumb() const { return Pref32BitThumb; } + bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } bool hasMPExtension() const { return HasMPExtension; } bool hasFP16() const { return HasFP16; } bool hasD16() const { return HasD16; } - bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; } + const Triple &getTargetTriple() const { return TargetTriple; } + + bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } bool isTargetELF() const { return !isTargetDarwin(); } bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 0ee773b..29aa4f7 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -22,16 +22,13 @@ #include "llvm/Target/TargetRegistry.h" using namespace llvm; -static cl::opt<bool>ExpandMLx("expand-fp-mlx", cl::init(false), cl::Hidden); - static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); - switch (TheTriple.getOS()) { - case Triple::Darwin: + + if (TheTriple.isOSDarwin()) return new ARMMCAsmInfoDarwin(); - default: - return new ARMELFMCAsmInfo(); - } + + return new ARMELFMCAsmInfo(); } // This is duplicated code. Refactor this. @@ -41,17 +38,17 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, MCCodeEmitter *Emitter, bool RelaxAll, bool NoExecStack) { - switch (Triple(TT).getOS()) { - case Triple::Darwin: + Triple TheTriple(TT); + + if (TheTriple.isOSDarwin()) return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll); - case Triple::MinGW32: - case Triple::Cygwin: - case Triple::Win32: + + if (TheTriple.isOSWindows()) { llvm_unreachable("ARM does not support Windows COFF format"); return NULL; - default: - return createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack); } + + return createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack); } extern "C" void LLVMInitializeARMTarget() { @@ -86,8 +83,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, : LLVMTargetMachine(T, TT), Subtarget(TT, FS, isThumb), JITInfo(), - InstrItins(Subtarget.getInstrItineraryData()) -{ + InstrItins(Subtarget.getInstrItineraryData()) { DefRelocModel = getRelocationModel(); } @@ -149,8 +145,7 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass(true)); - if (ExpandMLx && - OptLevel != CodeGenOpt::None && Subtarget.hasVFP2()) + if (OptLevel != CodeGenOpt::None && Subtarget.isCortexA9()) PM.add(createMLxExpansionPass()); return true; diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index 7535da5..19defa1 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -36,8 +36,9 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, ELF::SHF_WRITE | ELF::SHF_ALLOC, SectionKind::getDataRel()); + LSDASection = NULL; } - + AttributesSection = getContext().getELFSection(".ARM.attributes", ELF::SHT_ARM_ATTRIBUTES, diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 129af20..29ecc18 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -29,15 +29,6 @@ #include "llvm/ADT/Twine.h" using namespace llvm; -/// Shift types used for register controlled shifts in ARM memory addressing. -enum ShiftType { - Lsl, - Lsr, - Asr, - Ror, - Rrx -}; - namespace { class ARMOperand; @@ -55,8 +46,10 @@ class ARMAsmParser : public TargetAsmParser { int TryParseRegister(); virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); bool TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &); + bool TryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &); bool ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &); - bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &); + bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &, + ARMII::AddrMode AddrMode); bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic); bool ParsePrefix(ARMMCExpr::VariantKind &RefKind); const MCExpr *ApplyPrefixToExpr(const MCExpr *E, @@ -65,13 +58,14 @@ class ARMAsmParser : public TargetAsmParser { bool ParseMemoryOffsetReg(bool &Negative, bool &OffsetRegShifted, - enum ShiftType &ShiftType, + enum ARM_AM::ShiftOpc &ShiftType, const MCExpr *&ShiftAmount, const MCExpr *&Offset, bool &OffsetIsReg, int &OffsetRegNum, SMLoc &E); - bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E); + bool ParseShift(enum ARM_AM::ShiftOpc &St, + const MCExpr *&ShiftAmount, SMLoc &E); bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveThumb(SMLoc L); bool ParseDirectiveThumbFunc(SMLoc L); @@ -102,10 +96,25 @@ class ARMAsmParser : public TargetAsmParser { SmallVectorImpl<MCParsedAsmOperand*>&); OperandMatchResultTy tryParseMSRMaskOperand( SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy tryParseMemMode2Operand( + SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy tryParseMemMode3Operand( + SmallVectorImpl<MCParsedAsmOperand*>&); + + // Asm Match Converter Methods + bool CvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool CvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool CvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool CvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); public: ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM) : TargetAsmParser(T), Parser(_Parser), TM(_TM) { + MCAsmParserExtension::Initialize(_Parser); // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures( &TM.getSubtarget<ARMSubtarget>())); @@ -136,6 +145,7 @@ class ARMOperand : public MCParsedAsmOperand { RegisterList, DPRRegisterList, SPRRegisterList, + Shifter, Token } Kind; @@ -178,13 +188,14 @@ class ARMOperand : public MCParsedAsmOperand { /// Combined record for all forms of ARM address expressions. struct { + ARMII::AddrMode AddrMode; unsigned BaseRegNum; union { unsigned RegNum; ///< Offset register num, when OffsetIsReg. const MCExpr *Value; ///< Offset value, when !OffsetIsReg. } Offset; const MCExpr *ShiftAmount; // used when OffsetRegShifted is true - enum ShiftType ShiftType; // used when OffsetRegShifted is true + enum ARM_AM::ShiftOpc ShiftType; // used when OffsetRegShifted is true unsigned OffsetRegShifted : 1; // only used when OffsetIsReg is true unsigned Preindexed : 1; unsigned Postindexed : 1; @@ -192,6 +203,11 @@ class ARMOperand : public MCParsedAsmOperand { unsigned Negative : 1; // only used when OffsetIsReg is true unsigned Writeback : 1; } Mem; + + struct { + ARM_AM::ShiftOpc ShiftTy; + unsigned RegNum; + } Shift; }; ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} @@ -234,6 +250,10 @@ public: break; case ProcIFlags: IFlags = o.IFlags; + break; + case Shifter: + Shift = o.Shift; + break; } } @@ -290,7 +310,9 @@ public: /// @name Memory Operand Accessors /// @{ - + ARMII::AddrMode getMemAddrMode() const { + return Mem.AddrMode; + } unsigned getMemBaseRegNum() const { return Mem.BaseRegNum; } @@ -310,7 +332,7 @@ public: assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!"); return Mem.ShiftAmount; } - enum ShiftType getMemShiftType() const { + enum ARM_AM::ShiftOpc getMemShiftType() const { assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!"); return Mem.ShiftType; } @@ -334,6 +356,52 @@ public: bool isToken() const { return Kind == Token; } bool isMemBarrierOpt() const { return Kind == MemBarrierOpt; } bool isMemory() const { return Kind == Memory; } + bool isShifter() const { return Kind == Shifter; } + bool isMemMode2() const { + if (getMemAddrMode() != ARMII::AddrMode2) + return false; + + if (getMemOffsetIsReg()) + return true; + + if (getMemNegative() && + !(getMemPostindexed() || getMemPreindexed())) + return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + if (!CE) return false; + int64_t Value = CE->getValue(); + + // The offset must be in the range 0-4095 (imm12). + if (Value > 4095 || Value < -4095) + return false; + + return true; + } + bool isMemMode3() const { + if (getMemAddrMode() != ARMII::AddrMode3) + return false; + + if (getMemOffsetIsReg()) { + if (getMemOffsetRegShifted()) + return false; // No shift with offset reg allowed + return true; + } + + if (getMemNegative() && + !(getMemPostindexed() || getMemPreindexed())) + return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + if (!CE) return false; + int64_t Value = CE->getValue(); + + // The offset must be in the range 0-255 (imm8). + if (Value > 255 || Value < -255) + return false; + + return true; + } bool isMemMode5() const { if (!isMemory() || getMemOffsetIsReg() || getMemWriteback() || getMemNegative()) @@ -346,6 +414,23 @@ public: int64_t Value = CE->getValue(); return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020); } + bool isMemMode7() const { + if (!isMemory() || + getMemPreindexed() || + getMemPostindexed() || + getMemOffsetIsReg() || + getMemNegative() || + getMemWriteback()) + return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + if (!CE) return false; + + if (CE->getValue()) + return false; + + return true; + } bool isMemModeRegThumb() const { if (!isMemory() || !getMemOffsetIsReg() || getMemWriteback()) return false; @@ -402,6 +487,12 @@ public: Inst.addOperand(MCOperand::CreateReg(getReg())); } + void addShifterOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm( + ARM_AM::getSORegOpc(Shift.ShiftTy, 0))); + } + void addRegListOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const SmallVectorImpl<unsigned> &RegList = getRegList(); @@ -428,6 +519,88 @@ public: Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt()))); } + void addMemMode7Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && isMemMode7() && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + (void)CE; + assert((CE || CE->getValue() == 0) && + "No offset operand support in mode 7"); + } + + void addMemMode2Operands(MCInst &Inst, unsigned N) const { + assert(isMemMode2() && "Invalid mode or number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); + unsigned IdxMode = (getMemPreindexed() | getMemPostindexed() << 1); + + if (getMemOffsetIsReg()) { + Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum())); + + ARM_AM::AddrOpc AMOpc = getMemNegative() ? ARM_AM::sub : ARM_AM::add; + ARM_AM::ShiftOpc ShOpc = ARM_AM::no_shift; + int64_t ShiftAmount = 0; + + if (getMemOffsetRegShifted()) { + ShOpc = getMemShiftType(); + const MCConstantExpr *CE = + dyn_cast<MCConstantExpr>(getMemShiftAmount()); + ShiftAmount = CE->getValue(); + } + + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(AMOpc, ShiftAmount, + ShOpc, IdxMode))); + return; + } + + // Create a operand placeholder to always yield the same number of operands. + Inst.addOperand(MCOperand::CreateReg(0)); + + // FIXME: #-0 is encoded differently than #0. Does the parser preserve + // the difference? + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + assert(CE && "Non-constant mode 2 offset operand!"); + int64_t Offset = CE->getValue(); + + if (Offset >= 0) + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(ARM_AM::add, + Offset, ARM_AM::no_shift, IdxMode))); + else + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(ARM_AM::sub, + -Offset, ARM_AM::no_shift, IdxMode))); + } + + void addMemMode3Operands(MCInst &Inst, unsigned N) const { + assert(isMemMode3() && "Invalid mode or number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); + unsigned IdxMode = (getMemPreindexed() | getMemPostindexed() << 1); + + if (getMemOffsetIsReg()) { + Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum())); + + ARM_AM::AddrOpc AMOpc = getMemNegative() ? ARM_AM::sub : ARM_AM::add; + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(AMOpc, 0, + IdxMode))); + return; + } + + // Create a operand placeholder to always yield the same number of operands. + Inst.addOperand(MCOperand::CreateReg(0)); + + // FIXME: #-0 is encoded differently than #0. Does the parser preserve + // the difference? + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + assert(CE && "Non-constant mode 3 offset operand!"); + int64_t Offset = CE->getValue(); + + if (Offset >= 0) + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(ARM_AM::add, + Offset, IdxMode))); + else + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(ARM_AM::sub, + -Offset, IdxMode))); + } + void addMemMode5Operands(MCInst &Inst, unsigned N) const { assert(N == 2 && isMemMode5() && "Invalid number of operands!"); @@ -525,6 +698,15 @@ public: return Op; } + static ARMOperand *CreateShifter(ARM_AM::ShiftOpc ShTy, + SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(Shifter); + Op->Shift.ShiftTy = ShTy; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + static ARMOperand * CreateRegList(const SmallVectorImpl<std::pair<unsigned, SMLoc> > &Regs, SMLoc StartLoc, SMLoc EndLoc) { @@ -553,9 +735,10 @@ public: return Op; } - static ARMOperand *CreateMem(unsigned BaseRegNum, bool OffsetIsReg, - const MCExpr *Offset, int OffsetRegNum, - bool OffsetRegShifted, enum ShiftType ShiftType, + static ARMOperand *CreateMem(ARMII::AddrMode AddrMode, unsigned BaseRegNum, + bool OffsetIsReg, const MCExpr *Offset, + int OffsetRegNum, bool OffsetRegShifted, + enum ARM_AM::ShiftOpc ShiftType, const MCExpr *ShiftAmount, bool Preindexed, bool Postindexed, bool Negative, bool Writeback, SMLoc S, SMLoc E) { @@ -571,6 +754,7 @@ public: "Cannot have expression offset and register offset!"); ARMOperand *Op = new ARMOperand(Memory); + Op->Mem.AddrMode = AddrMode; Op->Mem.BaseRegNum = BaseRegNum; Op->Mem.OffsetIsReg = OffsetIsReg; if (OffsetIsReg) @@ -642,7 +826,8 @@ void ARMOperand::dump(raw_ostream &OS) const { break; case Memory: OS << "<memory " - << "base:" << getMemBaseRegNum(); + << "am:" << ARMII::AddrModeToString(getMemAddrMode()) + << " base:" << getMemBaseRegNum(); if (getMemOffsetIsReg()) { OS << " offset:<register " << getMemOffsetRegNum(); if (getMemOffsetRegShifted()) { @@ -676,6 +861,9 @@ void ARMOperand::dump(raw_ostream &OS) const { case Register: OS << "<register " << getReg() << ">"; break; + case Shifter: + OS << "<shifter " << getShiftOpcStr(Shift.ShiftTy) << ">"; + break; case RegisterList: case DPRRegisterList: case SPRRegisterList: { @@ -738,6 +926,42 @@ int ARMAsmParser::TryParseRegister() { return RegNum; } +/// Try to parse a register name. The token must be an Identifier when called, +/// and if it is a register name the token is eaten and the register number is +/// returned. Otherwise return -1. +/// +bool ARMAsmParser::TryParseShiftRegister( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + const AsmToken &Tok = Parser.getTok(); + assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + + std::string upperCase = Tok.getString().str(); + std::string lowerCase = LowercaseString(upperCase); + ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase) + .Case("lsl", ARM_AM::lsl) + .Case("lsr", ARM_AM::lsr) + .Case("asr", ARM_AM::asr) + .Case("ror", ARM_AM::ror) + .Case("rrx", ARM_AM::rrx) + .Default(ARM_AM::no_shift); + + if (ShiftTy == ARM_AM::no_shift) + return true; + + Parser.Lex(); // Eat shift-type operand; + int RegNum = TryParseRegister(); + if (RegNum == -1) + return Error(Parser.getTok().getLoc(), "register expected"); + + Operands.push_back(ARMOperand::CreateReg(RegNum,S, Parser.getTok().getLoc())); + Operands.push_back(ARMOperand::CreateShifter(ShiftTy, + S, Parser.getTok().getLoc())); + + return false; +} + + /// Try to parse a register name. The token must be an Identifier when called. /// If it's a register, an AsmOperand is created. Another AsmOperand is created /// if there is a "writeback". 'true' if it's not a register. @@ -1046,13 +1270,96 @@ tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } +/// tryParseMemMode2Operand - Try to parse memory addressing mode 2 operand. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +tryParseMemMode2Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a \"[\""); + + if (ParseMemory(Operands, ARMII::AddrMode2)) + return MatchOperand_NoMatch; + + return MatchOperand_Success; +} + +/// tryParseMemMode3Operand - Try to parse memory addressing mode 3 operand. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +tryParseMemMode3Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a \"[\""); + + if (ParseMemory(Operands, ARMII::AddrMode3)) + return MatchOperand_NoMatch; + + return MatchOperand_Success; +} + +/// CvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +CvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + + ((ARMOperand*)Operands[3])->addMemMode2Operands(Inst, 3); + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// CvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +CvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addMemMode2Operands(Inst, 3); + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// CvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +CvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + + ((ARMOperand*)Operands[3])->addMemMode3Operands(Inst, 3); + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// CvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +CvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addMemMode3Operands(Inst, 3); + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + /// Parse an ARM memory expression, return false if successful else return true /// or an error. The first token must be a '[' when called. /// /// TODO Only preindexing and postindexing addressing are started, unindexed /// with option, etc are still to do. bool ARMAsmParser:: -ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + ARMII::AddrMode AddrMode = ARMII::AddrModeNone) { SMLoc S, E; assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a Left Bracket"); @@ -1083,7 +1390,7 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ARMOperand *WBOp = 0; int OffsetRegNum = -1; bool OffsetRegShifted = false; - enum ShiftType ShiftType = Lsl; + enum ARM_AM::ShiftOpc ShiftType = ARM_AM::lsl; const MCExpr *ShiftAmount = 0; const MCExpr *Offset = 0; @@ -1106,10 +1413,17 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { const AsmToken &ExclaimTok = Parser.getTok(); if (ExclaimTok.is(AsmToken::Exclaim)) { + // None of addrmode3 instruction uses "!" + if (AddrMode == ARMII::AddrMode3) + return true; + WBOp = ARMOperand::CreateToken(ExclaimTok.getString(), ExclaimTok.getLoc()); Writeback = true; Parser.Lex(); // Eat exclaim token + } else { // In addressing mode 2, pre-indexed mode always end with "!" + if (AddrMode == ARMII::AddrMode2) + Preindexed = false; } } else { // The "[Rn" we have so far was not followed by a comma. @@ -1143,13 +1457,17 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (!OffsetIsReg) { if (!Offset) Offset = MCConstantExpr::Create(0, getContext()); + } else { + if (AddrMode == ARMII::AddrMode3 && OffsetRegShifted) { + Error(E, "shift amount not supported"); + return true; + } } - Operands.push_back(ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, - OffsetRegNum, OffsetRegShifted, - ShiftType, ShiftAmount, Preindexed, - Postindexed, Negative, Writeback, - S, E)); + Operands.push_back(ARMOperand::CreateMem(AddrMode, BaseRegNum, OffsetIsReg, + Offset, OffsetRegNum, OffsetRegShifted, + ShiftType, ShiftAmount, Preindexed, + Postindexed, Negative, Writeback, S, E)); if (WBOp) Operands.push_back(WBOp); @@ -1165,7 +1483,7 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// we return false on success or an error otherwise. bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative, bool &OffsetRegShifted, - enum ShiftType &ShiftType, + enum ARM_AM::ShiftOpc &ShiftType, const MCExpr *&ShiftAmount, const MCExpr *&Offset, bool &OffsetIsReg, @@ -1226,28 +1544,28 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative, /// ( lsl | lsr | asr | ror ) , # shift_amount /// rrx /// and returns true if it parses a shift otherwise it returns false. -bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount, - SMLoc &E) { +bool ARMAsmParser::ParseShift(ARM_AM::ShiftOpc &St, + const MCExpr *&ShiftAmount, SMLoc &E) { const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) return true; StringRef ShiftName = Tok.getString(); if (ShiftName == "lsl" || ShiftName == "LSL") - St = Lsl; + St = ARM_AM::lsl; else if (ShiftName == "lsr" || ShiftName == "LSR") - St = Lsr; + St = ARM_AM::lsr; else if (ShiftName == "asr" || ShiftName == "ASR") - St = Asr; + St = ARM_AM::asr; else if (ShiftName == "ror" || ShiftName == "ROR") - St = Ror; + St = ARM_AM::ror; else if (ShiftName == "rrx" || ShiftName == "RRX") - St = Rrx; + St = ARM_AM::rrx; else return true; Parser.Lex(); // Eat shift type token. // Rrx stands alone. - if (St == Rrx) + if (St == ARM_AM::rrx) return false; // Otherwise, there must be a '#' and a shift amount. @@ -1286,6 +1604,9 @@ bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, case AsmToken::Identifier: if (!TryParseRegisterWithWriteBack(Operands)) return false; + if (!TryParseShiftRegister(Operands)) + return false; + // Fall though for the Identifier case that is not a register or a // special name. diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 78d73d3..bdce2c4 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -18,6 +18,7 @@ #include "ARMDisassembler.h" #include "ARMDisassemblerCore.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/Target/TargetRegistry.h" @@ -94,6 +95,9 @@ static unsigned decodeARMInstruction(uint32_t &insn) { // As a result, the decoder fails to deocode USAT properly. if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1) return ARM::USAT; + // As a result, the decoder fails to deocode UQADD16 properly. + if (slice(insn, 27, 20) == 0x66 && slice(insn, 7, 4) == 1) + return ARM::UQADD16; // Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8. // As a result, the decoder fails to decode UMULL properly. @@ -280,6 +284,24 @@ static unsigned T2Morph2LoadLiteral(unsigned Opcode) { } } +// Helper function for special case handling of PLD (literal) and friends. +// See A8.6.117 T1 & T2 and friends for why we morphed the opcode +// before returning it. +static unsigned T2Morph2PLDLiteral(unsigned Opcode) { + switch (Opcode) { + default: + return Opcode; // Return unmorphed opcode. + + case ARM::t2PLDi8: case ARM::t2PLDs: + case ARM::t2PLDWi12: case ARM::t2PLDWi8: + case ARM::t2PLDWs: + return ARM::t2PLDi12; + + case ARM::t2PLIi8: case ARM::t2PLIs: + return ARM::t2PLIi12; + } +} + /// decodeThumbSideEffect is a decorator function which can potentially twiddle /// the instruction or morph the returned opcode under Thumb2. /// @@ -330,12 +352,27 @@ static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) { } // --------- Transform End Marker --------- + unsigned unmorphed = decodeThumbInstruction(insn); + // See, for example, A6.3.7 Load word: Table A6-18 Load word. // See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode // before returning it to our caller. if (op1 == 3 && slice(op2, 6, 5) == 0 && slice(op2, 0, 0) == 1 - && slice(insn, 19, 16) == 15) - return T2Morph2LoadLiteral(decodeThumbInstruction(insn)); + && slice(insn, 19, 16) == 15) { + unsigned morphed = T2Morph2LoadLiteral(unmorphed); + if (morphed != unmorphed) + return morphed; + } + + // See, for example, A8.6.117 PLD,PLDW (immediate) T1 & T2, and friends for + // why we morphed the opcode before returning it to our caller. + if (slice(insn, 31, 25) == 0x7C && slice(insn, 15, 12) == 0xF + && slice(insn, 22, 22) == 0 && slice(insn, 20, 20) == 1 + && slice(insn, 19, 16) == 15) { + unsigned morphed = T2Morph2PLDLiteral(unmorphed); + if (morphed != unmorphed) + return morphed; + } // One last check for NEON/VFP instructions. if ((op1 == 1 || op1 == 3) && slice(op2, 6, 6) == 1) @@ -375,21 +412,23 @@ bool ARMDisassembler::getInstruction(MCInst &MI, Size = 4; DEBUG({ - errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode) + errs() << "\nOpcode=" << Opcode << " Name=" <<ARMUtils::OpcodeName(Opcode) << " Format=" << stringForARMFormat(Format) << '(' << (int)Format << ")\n"; showBitVector(errs(), insn); }); - ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format); + OwningPtr<ARMBasicMCBuilder> Builder(CreateMCBuilder(Opcode, Format)); if (!Builder) return false; + Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(), + getDisInfoBlock(), getMCContext(), + Address); + if (!Builder->Build(MI, insn)) return false; - delete Builder; - return true; } @@ -398,7 +437,7 @@ bool ThumbDisassembler::getInstruction(MCInst &MI, const MemoryObject &Region, uint64_t Address, raw_ostream &os) const { - // The Thumb instruction stream is a sequence of halhwords. + // The Thumb instruction stream is a sequence of halfwords. // This represents the first halfword as well as the machine instruction // passed to decodeThumbInstruction(). For 16-bit Thumb instruction, the top @@ -463,17 +502,19 @@ bool ThumbDisassembler::getInstruction(MCInst &MI, showBitVector(errs(), insn); }); - ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format); + OwningPtr<ARMBasicMCBuilder> Builder(CreateMCBuilder(Opcode, Format)); if (!Builder) return false; Builder->SetSession(const_cast<Session *>(&SO)); + Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(), + getDisInfoBlock(), getMCContext(), + Address); + if (!Builder->Build(MI, insn)) return false; - delete Builder; - return true; } diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp index bac68dd..642829c 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp @@ -17,6 +17,7 @@ #include "ARMDisassemblerCore.h" #include "ARMAddressingModes.h" +#include "ARMMCExpr.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -82,10 +83,28 @@ const char *ARMUtils::OpcodeName(unsigned Opcode) { // FIXME: Auto-gened? static unsigned getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister) { - // For this purpose, we can treat rGPR as if it were GPR. - if (RegClassID == ARM::rGPRRegClassID) RegClassID = ARM::GPRRegClassID; + if (RegClassID == ARM::rGPRRegClassID) { + // Check for The register numbers 13 and 15 that are not permitted for many + // Thumb register specifiers. + if (RawRegister == 13 || RawRegister == 15) { + B->SetErr(-1); + return 0; + } + // For this purpose, we can treat rGPR as if it were GPR. + RegClassID = ARM::GPRRegClassID; + } // See also decodeNEONRd(), decodeNEONRn(), decodeNEONRm(). + // A7.3 register encoding + // Qd -> bit[12] == 0 + // Qn -> bit[16] == 0 + // Qm -> bit[0] == 0 + // + // If one of these bits is 1, the instruction is UNDEFINED. + if (RegClassID == ARM::QPRRegClassID && slice(RawRegister, 0, 0) == 1) { + B->SetErr(-1); + return 0; + } unsigned RegNum = RegClassID == ARM::QPRRegClassID ? RawRegister >> 1 : RawRegister; @@ -497,14 +516,66 @@ static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn, return false; } +// A8.6.94 MLA +// if d == 15 || n == 15 || m == 15 || a == 15 then UNPREDICTABLE; +// +// A8.6.105 MUL +// if d == 15 || n == 15 || m == 15 then UNPREDICTABLE; +// +// A8.6.246 UMULL +// if dLo == 15 || dHi == 15 || n == 15 || m == 15 then UNPREDICTABLE; +// if dHi == dLo then UNPREDICTABLE; +static bool BadRegsMulFrm(unsigned Opcode, uint32_t insn) { + unsigned R19_16 = slice(insn, 19, 16); + unsigned R15_12 = slice(insn, 15, 12); + unsigned R11_8 = slice(insn, 11, 8); + unsigned R3_0 = slice(insn, 3, 0); + switch (Opcode) { + default: + // Did we miss an opcode? + DEBUG(errs() << "BadRegsMulFrm: unexpected opcode!"); + return false; + case ARM::MLA: case ARM::MLS: case ARM::SMLABB: case ARM::SMLABT: + case ARM::SMLATB: case ARM::SMLATT: case ARM::SMLAWB: case ARM::SMLAWT: + case ARM::SMMLA: case ARM::SMMLAR: case ARM::SMMLS: case ARM::SMMLSR: + case ARM::USADA8: + if (R19_16 == 15 || R15_12 == 15 || R11_8 == 15 || R3_0 == 15) + return true; + return false; + case ARM::MUL: case ARM::SMMUL: case ARM::SMMULR: + case ARM::SMULBB: case ARM::SMULBT: case ARM::SMULTB: case ARM::SMULTT: + case ARM::SMULWB: case ARM::SMULWT: case ARM::SMUAD: case ARM::SMUADX: + // A8.6.167 SMLAD & A8.6.172 SMLSD + case ARM::SMLAD: case ARM::SMLADX: case ARM::SMLSD: case ARM::SMLSDX: + case ARM::USAD8: + if (R19_16 == 15 || R11_8 == 15 || R3_0 == 15) + return true; + return false; + case ARM::SMLAL: case ARM::SMULL: case ARM::UMAAL: case ARM::UMLAL: + case ARM::UMULL: + case ARM::SMLALBB: case ARM::SMLALBT: case ARM::SMLALTB: case ARM::SMLALTT: + case ARM::SMLALD: case ARM::SMLALDX: case ARM::SMLSLD: case ARM::SMLSLDX: + if (R19_16 == 15 || R15_12 == 15 || R11_8 == 15 || R3_0 == 15) + return true; + if (R19_16 == R15_12) + return true; + return false;; + } +} + // Multiply Instructions. -// MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, SMLAWT, SMMLA, SMMLS: +// MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, SMLAWT, SMMLA, SMMLAR, +// SMMLS, SMMLAR, SMLAD, SMLADX, SMLSD, SMLSDX, and USADA8 (for convenience): // Rd{19-16} Rn{3-0} Rm{11-8} Ra{15-12} +// But note that register checking for {SMLAD, SMLADX, SMLSD, SMLSDX} is +// only for {d, n, m}. // -// MUL, SMMUL, SMULBB, SMULBT, SMULTB, SMULTT, SMULWB, SMULWT: +// MUL, SMMUL, SMMULR, SMULBB, SMULBT, SMULTB, SMULTT, SMULWB, SMULWT, SMUAD, +// SMUADX, and USAD8 (for convenience): // Rd{19-16} Rn{3-0} Rm{11-8} // -// SMLAL, SMULL, UMAAL, UMLAL, UMULL, SMLALBB, SMLALBT, SMLALTB, SMLALTT: +// SMLAL, SMULL, UMAAL, UMLAL, UMULL, SMLALBB, SMLALBT, SMLALTB, SMLALTT, +// SMLALD, SMLADLX, SMLSLD, SMLSLDX: // RdLo{15-12} RdHi{19-16} Rn{3-0} Rm{11-8} // // The mapping of the multiply registers to the "regular" ARM registers, where @@ -531,6 +602,10 @@ static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, && OpInfo[2].RegClass == ARM::GPRRegClassID && "Expect three register operands"); + // Sanity check for the register encodings. + if (BadRegsMulFrm(Opcode, insn)) + return false; + // Instructions with two destination registers have RdLo{15-12} first. if (NumDefs == 2) { assert(NumOps >= 4 && OpInfo[3].RegClass == ARM::GPRRegClassID && @@ -618,18 +693,38 @@ static inline unsigned GetCopOpc(uint32_t insn) { static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - assert(NumOps >= 5 && "Num of operands >= 5 for coprocessor instr"); + assert(NumOps >= 4 && "Num of operands >= 4 for coprocessor instr"); unsigned &OpIdx = NumOpsAdded; + // A8.6.92 + // if coproc == '101x' then SEE "Advanced SIMD and VFP" + // But since the special instructions have more explicit encoding bits + // specified, if coproc == 10 or 11, we should reject it as invalid. + unsigned coproc = GetCoprocessor(insn); + if ((Opcode == ARM::MCR || Opcode == ARM::MCRR || + Opcode == ARM::MRC || Opcode == ARM::MRRC) && + (coproc == 10 || coproc == 11)) { + DEBUG(errs() << "Encoding error: coproc == 10 or 11 for MCR[R]/MR[R]C\n"); + return false; + } + bool OneCopOpc = (Opcode == ARM::MCRR || Opcode == ARM::MCRR2 || Opcode == ARM::MRRC || Opcode == ARM::MRRC2); + // CDP/CDP2 has no GPR operand; the opc1 operand is also wider (Inst{23-20}). bool NoGPR = (Opcode == ARM::CDP || Opcode == ARM::CDP2); bool LdStCop = LdStCopOpcode(Opcode); + bool RtOut = (Opcode == ARM::MRC || Opcode == ARM::MRC2); OpIdx = 0; - MI.addOperand(MCOperand::CreateImm(GetCoprocessor(insn))); + if (RtOut) { + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + decodeRd(insn)))); + ++OpIdx; + } + MI.addOperand(MCOperand::CreateImm(coproc)); + ++OpIdx; if (LdStCop) { // Unindex if P:W = 0b00 --> _OPTION variant @@ -639,26 +734,34 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); + OpIdx += 2; if (PW) { MI.addOperand(MCOperand::CreateReg(0)); ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; + const TargetInstrDesc &TID = ARMInsts[Opcode]; + unsigned IndexMode = + (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, slice(insn, 7, 0) << 2, - ARM_AM::no_shift); + ARM_AM::no_shift, IndexMode); MI.addOperand(MCOperand::CreateImm(Offset)); - OpIdx = 5; + OpIdx += 2; } else { MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 0))); - OpIdx = 4; + ++OpIdx; } } else { MI.addOperand(MCOperand::CreateImm(OneCopOpc ? GetCopOpc(insn) : GetCopOpc1(insn, NoGPR))); + ++OpIdx; - MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn)) - : MCOperand::CreateReg( - getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); + if (!RtOut) { + MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn)) + : MCOperand::CreateReg( + getRegisterEnum(B, ARM::GPRRegClassID, + decodeRd(insn)))); + ++OpIdx; + } MI.addOperand(OneCopOpc ? MCOperand::CreateReg( getRegisterEnum(B, ARM::GPRRegClassID, @@ -667,7 +770,7 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateImm(decodeRm(insn))); - OpIdx = 5; + OpIdx += 2; if (!OneCopOpc) { MI.addOperand(MCOperand::CreateImm(GetCopOpc2(insn))); @@ -679,8 +782,8 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, } // Branch Instructions. -// BLr9: SignExtend(Imm24:'00', 32) -// Bcc, BLr9_pred: SignExtend(Imm24:'00', 32) Pred0 Pred1 +// BL: SignExtend(Imm24:'00', 32) +// Bcc, BL_pred: SignExtend(Imm24:'00', 32) Pred0 Pred1 // SMC: ZeroExtend(imm4, 32) // SVC: ZeroExtend(Imm24, 32) // @@ -735,6 +838,11 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // MSRi take a mask, followed by one so_imm operand. The mask contains the // R Bit in bit 4, and the special register fields in bits 3-0. if (Opcode == ARM::MSRi) { + // A5.2.11 MSR (immediate), and hints & B6.1.6 MSR (immediate) + // The hints instructions have more specific encodings, so if mask == 0, + // we should reject this as an invalid instruction. + if (slice(insn, 19, 16) == 0) + return false; MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ | slice(insn, 19, 16) /* Special Reg */ )); // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0. @@ -760,11 +868,11 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } - assert((Opcode == ARM::Bcc || Opcode == ARM::BLr9 || Opcode == ARM::BLr9_pred + assert((Opcode == ARM::Bcc || Opcode == ARM::BL || Opcode == ARM::BL_pred || Opcode == ARM::SMC || Opcode == ARM::SVC) && "Unexpected Opcode"); - assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Reg operand expected"); + assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Imm operand expected"); int Imm32 = 0; if (Opcode == ARM::SMC) { @@ -778,12 +886,6 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned Imm26 = slice(insn, 23, 0) << 2; //Imm32 = signextend<signed int, 26>(Imm26); Imm32 = SignExtend32<26>(Imm26); - - // When executing an ARM instruction, PC reads as the address of the current - // instruction plus 8. The assembler subtracts 8 from the difference - // between the branch instruction and the target address, disassembler has - // to add 8 to compensate. - Imm32 += 8; } MI.addOperand(MCOperand::CreateImm(Imm32)); @@ -793,7 +895,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } // Misc. Branch Instructions. -// BLXr9, BXr9 +// BLX, BLXi, BX // BX, BX_RET static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { @@ -809,8 +911,9 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR) return true; - // BLXr9 and BX take one GPR reg. - if (Opcode == ARM::BLXr9 || Opcode == ARM::BX) { + // BLX and BX take one GPR reg. + if (Opcode == ARM::BLX || Opcode == ARM::BLX_pred || + Opcode == ARM::BX) { assert(NumOps >= 1 && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, @@ -819,6 +922,17 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } + // BLXi takes imm32 (the PC offset). + if (Opcode == ARM::BLXi) { + assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Imm operand expected"); + // SignExtend(imm24:H:'0', 32) where imm24 = Inst{23-0} and H = Inst{24}. + unsigned Imm26 = slice(insn, 23, 0) << 2 | slice(insn, 24, 24) << 1; + int Imm32 = SignExtend32<26>(Imm26); + MI.addOperand(MCOperand::CreateImm(Imm32)); + OpIdx = 1; + return true; + } + return false; } @@ -837,6 +951,24 @@ static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) { return true; } +// Standard data-processing instructions allow PC as a register specifier, +// but we should reject other DPFrm instructions with PC as registers. +static bool BadRegsDPFrm(unsigned Opcode, uint32_t insn) { + switch (Opcode) { + default: + // Did we miss an opcode? + if (decodeRd(insn) == 15 || decodeRn(insn) == 15 || decodeRm(insn) == 15) { + DEBUG(errs() << "DPFrm with bad reg specifier(s)\n"); + return true; + } + case ARM::ADCrr: case ARM::ADDSrr: case ARM::ADDrr: case ARM::ANDrr: + case ARM::BICrr: case ARM::CMNzrr: case ARM::CMPrr: case ARM::EORrr: + case ARM::ORRrr: case ARM::RSBrr: case ARM::RSCrr: case ARM::SBCrr: + case ARM::SUBSrr: case ARM::SUBrr: case ARM::TEQrr: case ARM::TSTrr: + return false; + } +} + // A major complication is the fact that some of the saturating add/subtract // operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm. // They are QADD, QDADD, QDSUB, and QSUB. @@ -864,6 +996,10 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Special-case handling of BFC/BFI/SBFX/UBFX. if (Opcode == ARM::BFC || Opcode == ARM::BFI) { + // A8.6.17 BFC & A8.6.18 BFI + // Sanity check Rd. + if (decodeRd(insn) == 15) + return false; MI.addOperand(MCOperand::CreateReg(0)); if (Opcode == ARM::BFI) { MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, @@ -879,6 +1015,9 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } if (Opcode == ARM::SBFX || Opcode == ARM::UBFX) { + // Sanity check Rd and Rm. + if (decodeRd(insn) == 15 || decodeRm(insn) == 15) + return false; MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 7))); @@ -915,15 +1054,21 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Assert disabled because saturating operations, e.g., A8.6.127 QASX, are // routed here as well. // assert(getIBit(insn) == 0 && "I_Bit != '0' reg/reg form"); + if (BadRegsDPFrm(Opcode, insn)) + return false; MI.addOperand(MCOperand::CreateReg( getRegisterEnum(B, ARM::GPRRegClassID, RmRn? decodeRn(insn) : decodeRm(insn)))); ++OpIdx; } else if (Opcode == ARM::MOVi16 || Opcode == ARM::MOVTi16) { + // These two instructions don't allow d as 15. + if (decodeRd(insn) == 15) + return false; // We have an imm16 = imm4:imm12 (imm4=Inst{19:16}, imm12 = Inst{11:0}). assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form"); unsigned Imm16 = slice(insn, 19, 16) << 12 | slice(insn, 11, 0); - MI.addOperand(MCOperand::CreateImm(Imm16)); + if (!B->tryAddingSymbolicOperand(Imm16, 4, MI)) + MI.addOperand(MCOperand::CreateImm(Imm16)); ++OpIdx; } else { // We have a reg/imm form. @@ -992,6 +1137,21 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); if (Rs) { + // If Inst{7} != 0, we should reject this insn as an invalid encoding. + if (slice(insn, 7, 7)) + return false; + + // A8.6.3 ADC (register-shifted register) + // if d == 15 || n == 15 || m == 15 || s == 15 then UNPREDICTABLE; + // + // This also accounts for shift instructions (register) where, fortunately, + // Inst{19-16} = 0b0000. + // A8.6.89 LSL (register) + // if d == 15 || n == 15 || m == 15 then UNPREDICTABLE; + if (decodeRd(insn) == 15 || decodeRn(insn) == 15 || + decodeRm(insn) == 15 || decodeRs(insn) == 15) + return false; + // Register-controlled shifts: [Rm, Rs, shift]. MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); @@ -1015,6 +1175,71 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } +static bool BadRegsLdStFrm(unsigned Opcode, uint32_t insn, bool Store, bool WBack, + bool Imm) { + const StringRef Name = ARMInsts[Opcode].Name; + unsigned Rt = decodeRd(insn); + unsigned Rn = decodeRn(insn); + unsigned Rm = decodeRm(insn); + unsigned P = getPBit(insn); + unsigned W = getWBit(insn); + + if (Store) { + // Only STR (immediate, register) allows PC as the source. + if (Name.startswith("STRB") && Rt == 15) { + DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n"); + return true; + } + if (WBack && (Rn == 15 || Rn == Rt)) { + DEBUG(errs() << "if wback && (n == 15 || n == t) then UNPREDICTABLE\n"); + return true; + } + if (!Imm && Rm == 15) { + DEBUG(errs() << "if m == 15 then UNPREDICTABLE\n"); + return true; + } + } else { + // Only LDR (immediate, register) allows PC as the destination. + if (Name.startswith("LDRB") && Rt == 15) { + DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n"); + return true; + } + if (Imm) { + // Immediate + if (Rn == 15) { + // The literal form must be in offset mode; it's an encoding error + // otherwise. + if (!(P == 1 && W == 0)) { + DEBUG(errs() << "Ld literal form with !(P == 1 && W == 0)\n"); + return true; + } + // LDRB (literal) does not allow PC as the destination. + if (Opcode != ARM::LDRi12 && Rt == 15) { + DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n"); + return true; + } + } else { + // Write back while Rn == Rt does not make sense. + if (WBack && (Rn == Rt)) { + DEBUG(errs() << "if wback && n == t then UNPREDICTABLE\n"); + return true; + } + } + } else { + // Register + if (Rm == 15) { + DEBUG(errs() << "if m == 15 then UNPREDICTABLE\n"); + return true; + } + if (WBack && (Rn == 15 || Rn == Rt)) { + DEBUG(errs() << "if wback && (n == 15 || n == t) then UNPREDICTABLE\n"); + return true; + } + } + } + return false; +} + static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) { @@ -1077,19 +1302,41 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (OpIdx + 1 >= NumOps) return false; - assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) && - (OpInfo[OpIdx+1].RegClass < 0) && - "Expect 1 reg operand followed by 1 imm operand"); + if (BadRegsLdStFrm(Opcode, insn, isStore, isPrePost, getIBit(insn)==0)) + return false; ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; + unsigned IndexMode = + (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; if (getIBit(insn) == 0) { - MI.addOperand(MCOperand::CreateReg(0)); + // For pre- and post-indexed case, add a reg0 operand (Addressing Mode #2). + // Otherwise, skip the reg operand since for addrmode_imm12, Rn has already + // been populated. + if (isPrePost) { + MI.addOperand(MCOperand::CreateReg(0)); + OpIdx += 1; + } - // Disassemble the 12-bit immediate offset. unsigned Imm12 = slice(insn, 11, 0); - unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, Imm12, ARM_AM::no_shift); - MI.addOperand(MCOperand::CreateImm(Offset)); + if (Opcode == ARM::LDRBi12 || Opcode == ARM::LDRi12 || + Opcode == ARM::STRBi12 || Opcode == ARM::STRi12) { + // Disassemble the 12-bit immediate offset, which is the second operand in + // $addrmode_imm12 => (ops GPR:$base, i32imm:$offsimm). + int Offset = AddrOpcode == ARM_AM::add ? 1 * Imm12 : -1 * Imm12; + MI.addOperand(MCOperand::CreateImm(Offset)); + } else { + // Disassemble the 12-bit immediate offset, which is the second operand in + // $am2offset => (ops GPR, i32imm). + unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, Imm12, ARM_AM::no_shift, + IndexMode); + MI.addOperand(MCOperand::CreateImm(Offset)); + } + OpIdx += 1; } else { + // If Inst{25} = 1 and Inst{4} != 0, we should reject this as invalid. + if (slice(insn,4,4) == 1) + return false; + // Disassemble the offset reg (Rm), shift type, and immediate shift length. MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); @@ -1101,9 +1348,9 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // A8.4.1. Possible rrx or shift amount of 32... getImmShiftSE(ShOp, ShImm); MI.addOperand(MCOperand::CreateImm( - ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp))); + ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp, IndexMode))); + OpIdx += 2; } - OpIdx += 2; return true; } @@ -1125,7 +1372,7 @@ static bool HasDualReg(unsigned Opcode) { case ARM::LDRD: case ARM::LDRD_PRE: case ARM::LDRD_POST: case ARM::STRD: case ARM::STRD_PRE: case ARM::STRD_POST: return true; - } + } } static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, @@ -1153,8 +1400,6 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, ++OpIdx; } - bool DualReg = HasDualReg(Opcode); - // Disassemble the dst/src operand. if (OpIdx >= NumOps) return false; @@ -1165,8 +1410,8 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, decodeRd(insn)))); ++OpIdx; - // Fill in LDRD and STRD's second operand. - if (DualReg) { + // Fill in LDRD and STRD's second operand Rt operand. + if (HasDualReg(Opcode)) { MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn) + 1))); ++OpIdx; @@ -1188,7 +1433,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) - && "Index mode or tied_to operand expected"); + && "Offset mode or tied_to operand expected"); MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; @@ -1204,19 +1449,22 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, "Expect 1 reg operand followed by 1 imm operand"); ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; + unsigned IndexMode = + (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; if (getAM3IBit(insn) == 1) { MI.addOperand(MCOperand::CreateReg(0)); // Disassemble the 8-bit immediate offset. unsigned Imm4H = (insn >> ARMII::ImmHiShift) & 0xF; unsigned Imm4L = insn & 0xF; - unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, (Imm4H << 4) | Imm4L); + unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, (Imm4H << 4) | Imm4L, + IndexMode); MI.addOperand(MCOperand::CreateImm(Offset)); } else { // Disassemble the offset reg (Rm). MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); - unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0); + unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0, IndexMode); MI.addOperand(MCOperand::CreateImm(Offset)); } OpIdx += 2; @@ -1236,13 +1484,13 @@ static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } // The algorithm for disassembly of LdStMulFrm is different from others because -// it explicitly populates the two predicate operands after operand 0 (the base) -// and operand 1 (the AM4 mode imm). After operand 3, we need to populate the -// reglist with each affected register encoded as an MCOperand. +// it explicitly populates the two predicate operands after the base register. +// After that, we need to populate the reglist with each affected register +// encoded as an MCOperand. static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - assert(NumOps >= 5 && "LdStMulFrm expects NumOps >= 5"); + assert(NumOps >= 4 && "LdStMulFrm expects NumOps >= 4"); NumOpsAdded = 0; unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); @@ -1260,8 +1508,10 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateReg(Base)); // Handling the two predicate operands before the reglist. - int64_t CondVal = insn >> ARMII::CondShift; - MI.addOperand(MCOperand::CreateImm(CondVal == 0xF ? 0xE : CondVal)); + int64_t CondVal = getCondField(insn); + if (CondVal == 0xF) + return false; + MI.addOperand(MCOperand::CreateImm(CondVal)); MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); NumOpsAdded += 3; @@ -1352,6 +1602,12 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID; + // Sanity check the registers, which should not be 15. + if (decodeRd(insn) == 15 || decodeRm(insn) == 15) + return false; + if (ThreeReg && decodeRn(insn) == 15) + return false; + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; @@ -1376,7 +1632,7 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, ARM_AM::ShiftOpc Opc = ARM_AM::no_shift; if (Opcode == ARM::PKHBT) Opc = ARM_AM::lsl; - else if (Opcode == ARM::PKHBT) + else if (Opcode == ARM::PKHTB) Opc = ARM_AM::asr; getImmShiftSE(Opc, ShiftAmt); MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShiftAmt))); @@ -1391,6 +1647,11 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + // A8.6.183 SSAT + // if d == 15 || n == 15 then UNPREDICTABLE; + if (decodeRd(insn) == 15 || decodeRm(insn) == 15) + return false; + const TargetInstrDesc &TID = ARMInsts[Opcode]; NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands @@ -1429,6 +1690,11 @@ static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + // A8.6.220 SXTAB + // if d == 15 || m == 15 then UNPREDICTABLE; + if (decodeRd(insn) == 15 || decodeRm(insn) == 15) + return false; + const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; @@ -1611,7 +1877,7 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // A8.6.295 vcvt (floating-point <-> integer) // Int to FP: VSITOD, VSITOS, VUITOD, VUITOS // FP to Int: VTOSI[Z|R]D, VTOSI[Z|R]S, VTOUI[Z|R]D, VTOUI[Z|R]S -// +// // A8.6.297 vcvt (floating-point and fixed-point) // Dd|Sd Dd|Sd(TIED_TO) #fbits(= 16|32 - UInt(imm4:i)) static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, @@ -1800,15 +2066,14 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } // VFP Load/Store Multiple Instructions. -// This is similar to the algorithm for LDM/STM in that operand 0 (the base) and -// operand 1 (the AM4 mode imm) is followed by two predicate operands. It is -// followed by a reglist of either DPR(s) or SPR(s). +// We have an optional write back reg, the base, and two predicate operands. +// It is then followed by a reglist of either DPR(s) or SPR(s). // // VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD] static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - assert(NumOps >= 5 && "VFPLdStMulFrm expects NumOps >= 5"); + assert(NumOps >= 4 && "VFPLdStMulFrm expects NumOps >= 4"); unsigned &OpIdx = NumOpsAdded; @@ -1827,25 +2092,18 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateReg(Base)); - // Next comes the AM4 Opcode. - ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn)); - // Must be either "ia" or "db" submode. - if (SubMode != ARM_AM::ia && SubMode != ARM_AM::db) { - DEBUG(errs() << "Illegal addressing mode 4 sub-mode!\n"); - return false; - } - MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode))); - // Handling the two predicate operands before the reglist. - int64_t CondVal = insn >> ARMII::CondShift; - MI.addOperand(MCOperand::CreateImm(CondVal == 0xF ? 0xE : CondVal)); + int64_t CondVal = getCondField(insn); + if (CondVal == 0xF) + return false; + MI.addOperand(MCOperand::CreateImm(CondVal)); MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); - OpIdx += 4; + OpIdx += 3; - bool isSPVFP = (Opcode == ARM::VLDMSIA || Opcode == ARM::VLDMSDB || + bool isSPVFP = (Opcode == ARM::VLDMSIA || Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMSDB_UPD || - Opcode == ARM::VSTMSIA || Opcode == ARM::VSTMSDB || + Opcode == ARM::VSTMSIA || Opcode == ARM::VSTMSIA_UPD || Opcode == ARM::VSTMSDB_UPD); unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID; @@ -1855,6 +2113,11 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Fill the variadic part of reglist. unsigned char Imm8 = insn & 0xFF; unsigned Regs = isSPVFP ? Imm8 : Imm8/2; + + // Apply some sanity checks before proceeding. + if (Regs == 0 || (RegD + Regs) > 32 || (!isSPVFP && Regs > 16)) + return false; + for (unsigned i = 0; i < Regs; ++i) { MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, RegD + i))); @@ -2136,7 +2399,7 @@ static unsigned decodeN3VImm(uint32_t insn) { // Correctly set VLD*/VST*'s TIED_TO GPR, as the asm printer needs it. static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced, - BO B) { + unsigned alignment, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -2180,9 +2443,10 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected"); + // addrmode6 := (ops GPR:$addr, i32imm) MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); - MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored? + MI.addOperand(MCOperand::CreateImm(alignment)); // Alignment OpIdx += 2; if (WB) { @@ -2230,9 +2494,10 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected"); + // addrmode6 := (ops GPR:$addr, i32imm) MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); - MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored? + MI.addOperand(MCOperand::CreateImm(alignment)); // Alignment OpIdx += 2; if (WB) { @@ -2263,6 +2528,92 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } +// A8.6.308, A8.6.311, A8.6.314, A8.6.317. +static bool Align4OneLaneInst(unsigned elem, unsigned size, + unsigned index_align, unsigned & alignment) { + unsigned bits = 0; + switch (elem) { + default: + return false; + case 1: + // A8.6.308 + if (size == 0) + return slice(index_align, 0, 0) == 0; + else if (size == 1) { + bits = slice(index_align, 1, 0); + if (bits != 0 && bits != 1) + return false; + if (bits == 1) + alignment = 16; + return true; + } else if (size == 2) { + bits = slice(index_align, 2, 0); + if (bits != 0 && bits != 3) + return false; + if (bits == 3) + alignment = 32; + return true;; + } + return true; + case 2: + // A8.6.311 + if (size == 0) { + if (slice(index_align, 0, 0) == 1) + alignment = 16; + return true; + } if (size == 1) { + if (slice(index_align, 0, 0) == 1) + alignment = 32; + return true; + } else if (size == 2) { + if (slice(index_align, 1, 1) != 0) + return false; + if (slice(index_align, 0, 0) == 1) + alignment = 64; + return true;; + } + return true; + case 3: + // A8.6.314 + if (size == 0) { + if (slice(index_align, 0, 0) != 0) + return false; + return true; + } if (size == 1) { + if (slice(index_align, 0, 0) != 0) + return false; + return true; + return true; + } else if (size == 2) { + if (slice(index_align, 1, 0) != 0) + return false; + return true;; + } + return true; + case 4: + // A8.6.317 + if (size == 0) { + if (slice(index_align, 0, 0) == 1) + alignment = 32; + return true; + } if (size == 1) { + if (slice(index_align, 0, 0) == 1) + alignment = 64; + return true; + } else if (size == 2) { + bits = slice(index_align, 1, 0); + if (bits == 3) + return false; + if (bits == 1) + alignment = 64; + else if (bits == 2) + alignment = 128; + return true;; + } + return true; + } +} + // A7.7 // If L (Inst{21}) == 0, store instructions. // Find out about double-spaced-ness of the Opcode and pass it on to @@ -2272,11 +2623,33 @@ static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn, const StringRef Name = ARMInsts[Opcode].Name; bool DblSpaced = false; + // 0 represents standard alignment, i.e., unaligned data access. + unsigned alignment = 0; + + unsigned elem = 0; // legal values: {1, 2, 3, 4} + if (Name.startswith("VST1") || Name.startswith("VLD1")) + elem = 1; + + if (Name.startswith("VST2") || Name.startswith("VLD2")) + elem = 2; + + if (Name.startswith("VST3") || Name.startswith("VLD3")) + elem = 3; + + if (Name.startswith("VST4") || Name.startswith("VLD4")) + elem = 4; if (Name.find("LN") != std::string::npos) { // To one lane instructions. // See, for example, 8.6.317 VLD4 (single 4-element structure to one lane). + // Utility function takes number of elements, size, and index_align. + if (!Align4OneLaneInst(elem, + slice(insn, 11, 10), + slice(insn, 7, 4), + alignment)) + return false; + // <size> == 16 && Inst{5} == 1 --> DblSpaced = true if (Name.endswith("16") || Name.endswith("16_UPD")) DblSpaced = slice(insn, 5, 5) == 1; @@ -2284,30 +2657,102 @@ static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn, // <size> == 32 && Inst{6} == 1 --> DblSpaced = true if (Name.endswith("32") || Name.endswith("32_UPD")) DblSpaced = slice(insn, 6, 6) == 1; - + } else if (Name.find("DUP") != std::string::npos) { + // Single element (or structure) to all lanes. + // Inst{9-8} encodes the number of element(s) in the structure, with: + // 0b00 (VLD1DUP) (for this, a bit makes sense only for data size 16 and 32. + // 0b01 (VLD2DUP) + // 0b10 (VLD3DUP) (for this, a bit must be encoded as 0) + // 0b11 (VLD4DUP) + // + // Inst{7-6} encodes the data size, with: + // 0b00 => 8, 0b01 => 16, 0b10 => 32 + // + // Inst{4} (the a bit) encodes the align action (0: standard alignment) + unsigned elem = slice(insn, 9, 8) + 1; + unsigned a = slice(insn, 4, 4); + if (elem != 3) { + // 0b11 is not a valid encoding for Inst{7-6}. + if (slice(insn, 7, 6) == 3) + return false; + unsigned data_size = 8 << slice(insn, 7, 6); + // For VLD1DUP, a bit makes sense only for data size of 16 and 32. + if (a && data_size == 8) + return false; + + // Now we can calculate the alignment! + if (a) + alignment = elem * data_size; + } else { + if (a) { + // A8.6.315 VLD3 (single 3-element structure to all lanes) + // The a bit must be encoded as 0. + return false; + } + } } else { // Multiple n-element structures with type encoded as Inst{11-8}. // See, for example, A8.6.316 VLD4 (multiple 4-element structures). - // n == 2 && type == 0b1001 -> DblSpaced = true - if (Name.startswith("VST2") || Name.startswith("VLD2")) - DblSpaced = slice(insn, 11, 8) == 9; - - // n == 3 && type == 0b0101 -> DblSpaced = true - if (Name.startswith("VST3") || Name.startswith("VLD3")) - DblSpaced = slice(insn, 11, 8) == 5; - - // n == 4 && type == 0b0001 -> DblSpaced = true - if (Name.startswith("VST4") || Name.startswith("VLD4")) - DblSpaced = slice(insn, 11, 8) == 1; - + // Inst{5-4} encodes alignment. + unsigned align = slice(insn, 5, 4); + switch (align) { + default: + break; + case 1: + alignment = 64; break; + case 2: + alignment = 128; break; + case 3: + alignment = 256; break; + } + + unsigned type = slice(insn, 11, 8); + // Reject UNDEFINED instructions based on type and align. + // Plus set DblSpaced flag where appropriate. + switch (elem) { + default: + break; + case 1: + // n == 1 + // A8.6.307 & A8.6.391 + if ((type == 7 && slice(align, 1, 1) == 1) || + (type == 10 && align == 3) || + (type == 6 && slice(align, 1, 1) == 1)) + return false; + break; + case 2: + // n == 2 && type == 0b1001 -> DblSpaced = true + // A8.6.310 & A8.6.393 + if ((type == 8 || type == 9) && align == 3) + return false; + DblSpaced = (type == 9); + break; + case 3: + // n == 3 && type == 0b0101 -> DblSpaced = true + // A8.6.313 & A8.6.395 + if (slice(insn, 7, 6) == 3 || slice(align, 1, 1) == 1) + return false; + DblSpaced = (type == 5); + break; + case 4: + // n == 4 && type == 0b0001 -> DblSpaced = true + // A8.6.316 & A8.6.397 + if (slice(insn, 7, 6) == 3) + return false; + DblSpaced = (type == 1); + break; + } } return DisassembleNLdSt0(MI, Opcode, insn, NumOps, NumOpsAdded, - slice(insn, 21, 21) == 0, DblSpaced, B); + slice(insn, 21, 21) == 0, DblSpaced, alignment/8, B); } // VMOV (immediate) // Qd/Dd imm +// VBIC (immediate) +// VORR (immediate) +// Qd/Dd imm src(=Qd/Dd) static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { @@ -2334,12 +2779,20 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, case ARM::VMOVv8i16: case ARM::VMVNv4i16: case ARM::VMVNv8i16: + case ARM::VBICiv4i16: + case ARM::VBICiv8i16: + case ARM::VORRiv4i16: + case ARM::VORRiv8i16: esize = ESize16; break; case ARM::VMOVv2i32: case ARM::VMOVv4i32: case ARM::VMVNv2i32: case ARM::VMVNv4i32: + case ARM::VBICiv2i32: + case ARM::VBICiv4i32: + case ARM::VORRiv2i32: + case ARM::VORRiv4i32: esize = ESize32; break; case ARM::VMOVv1i64: @@ -2347,7 +2800,7 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, esize = ESize64; break; default: - assert(0 && "Unreachable code!"); + assert(0 && "Unexpected opcode!"); return false; } @@ -2356,6 +2809,16 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, MI.addOperand(MCOperand::CreateImm(decodeN1VImm(insn, esize))); NumOpsAdded = 2; + + // VBIC/VORRiv*i* variants have an extra $src = $Vd to be filled in. + if (NumOps >= 3 && + (OpInfo[2].RegClass == ARM::DPRRegClassID || + OpInfo[2].RegClass == ARM::QPRRegClassID)) { + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass, + decodeNEONRd(insn)))); + NumOpsAdded += 1; + } + return true; } @@ -2376,7 +2839,7 @@ enum N2VFlag { // // Vector Move Long: // Qd Dm -// +// // Vector Move Narrow: // Dd Qm // @@ -2518,7 +2981,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpInfo[OpIdx].RegClass < 0 && "Imm operand expected"); // Add the imm operand. - + // VSHLL has maximum shift count as the imm, inferred from its size. unsigned Imm; switch (Opcode) { @@ -2631,7 +3094,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, // N3RegFrm. if (Opcode == ARM::VMOVDneon || Opcode == ARM::VMOVQ) return true; - + // Dm = Inst{5:3-0} => NEON Rm // or // Dm is restricted to D0-D7 if size is 16, D0-D15 otherwise @@ -2770,7 +3233,7 @@ static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, ElemSize esize = Opcode == ARM::VGETLNi32 ? ESize32 : ((Opcode == ARM::VGETLNs16 || Opcode == ARM::VGETLNu16) ? ESize16 - : ESize32); + : ESize8); // Rt = Inst{15-12} => ARM Rd MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, @@ -2852,17 +3315,6 @@ static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } -// A8.6.41 DMB -// A8.6.42 DSB -// A8.6.49 ISB -static inline bool MemBarrierInstr(uint32_t insn) { - unsigned op7_4 = slice(insn, 7, 4); - if (slice(insn, 31, 8) == 0xf57ff0 && (op7_4 >= 4 && op7_4 <= 6)) - return true; - - return false; -} - static inline bool PreLoadOpcode(unsigned Opcode) { switch(Opcode) { case ARM::PLDi12: case ARM::PLDrs: @@ -2878,8 +3330,8 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { // Preload Data/Instruction requires either 2 or 3 operands. - // PLDi, PLDWi, PLIi: addrmode_imm12 - // PLDr[a|m], PLDWr[a|m], PLIr[a|m]: ldst_so_reg + // PLDi12, PLDWi12, PLIi12: addrmode_imm12 + // PLDrs, PLDWrs, PLIrs: ldst_so_reg MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); @@ -2888,10 +3340,19 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, || Opcode == ARM::PLIi12) { unsigned Imm12 = slice(insn, 11, 0); bool Negative = getUBit(insn) == 0; + + // A8.6.118 PLD (literal) PLDWi12 with Rn=PC is transformed to PLDi12. + if (Opcode == ARM::PLDWi12 && slice(insn, 19, 16) == 0xF) { + DEBUG(errs() << "Rn == '1111': PLDWi12 morphed to PLDi12\n"); + MI.setOpcode(ARM::PLDi12); + } + // -0 is represented specially. All other values are as normal. + int Offset = Negative ? -1 * Imm12 : Imm12; if (Imm12 == 0 && Negative) - Imm12 = INT32_MIN; - MI.addOperand(MCOperand::CreateImm(Imm12)); + Offset = INT32_MIN; + + MI.addOperand(MCOperand::CreateImm(Offset)); NumOpsAdded = 2; } else { MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, @@ -2917,14 +3378,20 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - if (MemBarrierInstr(insn)) { - // DMBsy, DSBsy, and ISBsy instructions have zero operand and are taken care - // of within the generic ARMBasicMCBuilder::BuildIt() method. - // + if (Opcode == ARM::DMB || Opcode == ARM::DSB) { // Inst{3-0} encodes the memory barrier option for the variants. - MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0))); - NumOpsAdded = 1; - return true; + unsigned opt = slice(insn, 3, 0); + switch (opt) { + case ARM_MB::SY: case ARM_MB::ST: + case ARM_MB::ISH: case ARM_MB::ISHST: + case ARM_MB::NSH: case ARM_MB::NSHST: + case ARM_MB::OSH: case ARM_MB::OSHST: + MI.addOperand(MCOperand::CreateImm(opt)); + NumOpsAdded = 1; + return true; + default: + return false; + } } switch (Opcode) { @@ -2936,6 +3403,11 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, case ARM::WFI: case ARM::SEV: return true; + case ARM::SWP: + case ARM::SWPB: + // SWP, SWPB: Rd Rm Rn + // Delegate to DisassembleLdStExFrm().... + return DisassembleLdStExFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B); default: break; } @@ -2950,20 +3422,32 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // opcodes which match the same real instruction. This is needed since there's // no current handling of optional arguments. Fix here when a better handling // of optional arguments is implemented. - if (Opcode == ARM::CPS3p) { + if (Opcode == ARM::CPS3p) { // M = 1 + // Let's reject these impossible imod values by returning false: + // 1. (imod=0b01) + // + // AsmPrinter cannot handle imod=0b00, plus (imod=0b00,M=1,iflags!=0) is an + // invalid combination, so we just check for imod=0b00 here. + if (slice(insn, 19, 18) == 0 || slice(insn, 19, 18) == 1) + return false; MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode NumOpsAdded = 3; return true; } - if (Opcode == ARM::CPS2p) { + if (Opcode == ARM::CPS2p) { // mode = 0, M = 0 + // Let's reject these impossible imod values by returning false: + // 1. (imod=0b00,M=0) + // 2. (imod=0b01) + if (slice(insn, 19, 18) == 0 || slice(insn, 19, 18) == 1) + return false; MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags NumOpsAdded = 2; return true; } - if (Opcode == ARM::CPS1p) { + if (Opcode == ARM::CPS1p) { // imod = 0, iflags = 0, M = 1 MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode NumOpsAdded = 1; return true; @@ -3142,7 +3626,7 @@ bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode, return false; } - + /// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process /// the possible Predicate and SBitModifier, to build the remaining MCOperand /// constituents. @@ -3154,6 +3638,7 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode, const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; const std::string &Name = ARMInsts[Opcode].Name; unsigned Idx = MI.getNumOperands(); + uint64_t TSFlags = ARMInsts[Opcode].TSFlags; // First, we check whether this instr specifies the PredicateOperand through // a pair of TargetOperandInfos with isPredicate() property. @@ -3173,14 +3658,23 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode, // like ARM. // // A8.6.16 B - if (Name == "t2Bcc") - MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 25, 22)))); - else if (Name == "tBcc") - MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 11, 8)))); - else + // Check for undefined encodings. + unsigned cond; + if (Name == "t2Bcc") { + if ((cond = slice(insn, 25, 22)) >= 14) + return false; + MI.addOperand(MCOperand::CreateImm(CondCode(cond))); + } else if (Name == "tBcc") { + if ((cond = slice(insn, 11, 8)) == 14) + return false; + MI.addOperand(MCOperand::CreateImm(CondCode(cond))); + } else MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); } else { // ARM instructions get their condition field from Inst{31-28}. + // We should reject Inst{31-28} = 0b1111 as invalid encoding. + if (!isNEONDomain(TSFlags) && getCondField(insn) == 0xF) + return false; MI.addOperand(MCOperand::CreateImm(CondCode(getCondField(insn)))); } } @@ -3243,3 +3737,84 @@ ARMBasicMCBuilder *llvm::CreateMCBuilder(unsigned Opcode, ARMFormat Format) { return new ARMBasicMCBuilder(Opcode, Format, ARMInsts[Opcode].getNumOperands()); } + +/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic +/// operand in place of the immediate Value in the MCInst. The immediate +/// Value has had any PC adjustment made by the caller. If the getOpInfo() +/// function was set as part of the setupBuilderForSymbolicDisassembly() call +/// then that function is called to get any symbolic information at the +/// builder's Address for this instrution. If that returns non-zero then the +/// symbolic information it returns is used to create an MCExpr and that is +/// added as an operand to the MCInst. This function returns true if it adds +/// an operand to the MCInst and false otherwise. +bool ARMBasicMCBuilder::tryAddingSymbolicOperand(uint64_t Value, + uint64_t InstSize, + MCInst &MI) { + if (!GetOpInfo) + return false; + + struct LLVMOpInfo1 SymbolicOp; + SymbolicOp.Value = Value; + if (!GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) + return false; + + const MCExpr *Add = NULL; + if (SymbolicOp.AddSymbol.Present) { + if (SymbolicOp.AddSymbol.Name) { + StringRef Name(SymbolicOp.AddSymbol.Name); + MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); + Add = MCSymbolRefExpr::Create(Sym, *Ctx); + } else { + Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx); + } + } + + const MCExpr *Sub = NULL; + if (SymbolicOp.SubtractSymbol.Present) { + if (SymbolicOp.SubtractSymbol.Name) { + StringRef Name(SymbolicOp.SubtractSymbol.Name); + MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); + Sub = MCSymbolRefExpr::Create(Sym, *Ctx); + } else { + Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx); + } + } + + const MCExpr *Off = NULL; + if (SymbolicOp.Value != 0) + Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx); + + const MCExpr *Expr; + if (Sub) { + const MCExpr *LHS; + if (Add) + LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx); + else + LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx); + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx); + else + Expr = LHS; + } else if (Add) { + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx); + else + Expr = Add; + } else { + if (Off != 0) + Expr = Off; + else + Expr = MCConstantExpr::Create(0, *Ctx); + } + + if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16) + MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx))); + else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16) + MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx))); + else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None) + MI.addOperand(MCOperand::CreateExpr(Expr)); + else + assert("bad SymbolicOp.VariantKind"); + + return true; +} diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h index 9c30d33..a7ba141 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h @@ -22,12 +22,17 @@ #define ARMDISASSEMBLERCORE_H #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCContext.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm-c/Disassembler.h" #include "ARMBaseInstrInfo.h" #include "ARMRegisterInfo.h" #include "ARMDisassembler.h" namespace llvm { +class MCContext; class ARMUtils { public: @@ -134,6 +139,31 @@ static inline void setSlice(unsigned &Bits, unsigned From, unsigned To, Bits |= (Val & Mask) << To; } +// Return an integer result equal to the number of bits of x that are ones. +static inline uint32_t +BitCount (uint64_t x) +{ + // c accumulates the total bits set in x + uint32_t c; + for (c = 0; x; ++c) + { + x &= x - 1; // clear the least significant bit set + } + return c; +} + +static inline bool +BitIsSet (const uint64_t value, const uint64_t bit) +{ + return (value & (1ull << bit)) != 0; +} + +static inline bool +BitIsClear (const uint64_t value, const uint64_t bit) +{ + return (value & (1ull << bit)) == 0; +} + /// Various utilities for checking the target specific flags. /// A unary data processing instruction doesn't have an Rn operand. @@ -141,6 +171,12 @@ static inline bool isUnaryDP(uint64_t TSFlags) { return (TSFlags & ARMII::UnaryDP); } +/// A NEON Domain instruction has cond field (Inst{31-28}) as 0b1111. +static inline bool isNEONDomain(uint64_t TSFlags) { + return (TSFlags & ARMII::DomainNEON) || + (TSFlags & ARMII::DomainNEONA8); +} + /// This four-bit field describes the addressing mode used. /// See also ARMBaseInstrInfo.h. static inline unsigned getAddrMode(uint64_t TSFlags) { @@ -196,7 +232,7 @@ private: public: ARMBasicMCBuilder(ARMBasicMCBuilder &B) : Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Disasm(B.Disasm), - SP(B.SP) { + SP(B.SP), GetOpInfo(0), DisInfo(0), Ctx(0) { Err = 0; } @@ -255,6 +291,44 @@ private: assert(SP); return slice(SP->ITState, 7, 4); } + +private: + // + // Hooks for symbolic disassembly via the public 'C' interface. + // + // The function to get the symbolic information for operands. + LLVMOpInfoCallback GetOpInfo; + // The pointer to the block of symbolic information for above call back. + void *DisInfo; + // The assembly context for creating symbols and MCExprs in place of + // immediate operands when there is symbolic information. + MCContext *Ctx; + // The address of the instruction being disassembled. + uint64_t Address; + +public: + void setupBuilderForSymbolicDisassembly(LLVMOpInfoCallback getOpInfo, + void *disInfo, MCContext *ctx, + uint64_t address) { + GetOpInfo = getOpInfo; + DisInfo = disInfo; + Ctx = ctx; + Address = address; + } + + uint64_t getBuilderAddress() const { return Address; } + + /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic + /// operand in place of the immediate Value in the MCInst. The immediate + /// Value has had any PC adjustment made by the caller. If the getOpInfo() + /// function was set as part of the setupBuilderForSymbolicDisassembly() call + /// then that function is called to get any symbolic information at the + /// builder's Address for this instrution. If that returns non-zero then the + /// symbolic information it returns is used to create an MCExpr and that is + /// added as an operand to the MCInst. This function returns true if it adds + /// an operand to the MCInst and false otherwise. + bool tryAddingSymbolicOperand(uint64_t Value, uint64_t InstSize, MCInst &MI); + }; } // namespace llvm diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h index 23372e0..8d39982 100644 --- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h @@ -108,6 +108,8 @@ static inline bool IsGPR(unsigned RegClass) { // Utilities for 32-bit Thumb instructions. +static inline bool BadReg(uint32_t n) { return n == 13 || n == 15; } + // Extract imm4: Inst{19-16}. static inline unsigned getImm4(uint32_t insn) { return slice(insn, 19, 16); @@ -398,9 +400,17 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); - MI.addOperand(MCOperand::CreateImm(UseRt ? getT1Imm8(insn) - : (Imm3 ? getT1Imm3(insn) - : getT1Imm5(insn)))); + unsigned Imm = 0; + if (UseRt) + Imm = getT1Imm8(insn); + else if (Imm3) + Imm = getT1Imm3(insn); + else { + Imm = getT1Imm5(insn); + ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 12, 11)); + getImmShiftSE(ShOp, Imm); + } + MI.addOperand(MCOperand::CreateImm(Imm)); } ++OpIdx; @@ -469,6 +479,7 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn, // tBX_RET: 0 operand // tBX_RET_vararg: Rm // tBLXr_r9: Rm +// tBRIND: Rm static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { @@ -476,11 +487,17 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, if (NumOps == 0) return true; - // BX/BLX has 1 reg operand: Rm. - if (NumOps == 1) { + // BX/BLX/tBRIND (indirect branch, i.e, mov pc, Rm) has 1 reg operand: Rm. + if (Opcode==ARM::tBLXr_r9 || Opcode==ARM::tBX_Rm || Opcode==ARM::tBRIND) { + if (Opcode != ARM::tBRIND) { + // Handling the two predicate operands before the reg operand. + if (!B->DoPredicateOperands(MI, Opcode, insn, NumOps)) + return false; + NumOpsAdded += 2; + } MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, getT1Rm(insn)))); - NumOpsAdded = 1; + NumOpsAdded += 1; return true; } @@ -598,7 +615,7 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, // A6.2.4 Load/store single data item // -// Load/Store Register (reg|imm): tRd tRn imm5 tRm +// Load/Store Register (reg|imm): tRd tRn imm5|tRm // Load Register Signed Byte|Halfword: tRd tRn tRm static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { @@ -607,11 +624,6 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, const TargetOperandInfo *OpInfo = TID.OpInfo; unsigned &OpIdx = NumOpsAdded; - // Table A6-5 16-bit Thumb Load/store instructions - // opA = 0b0101 for STR/LDR (register) and friends. - // Otherwise, we have STR/LDR (immediate) and friends. - bool Imm5 = (opA != 5); - assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && OpInfo[1].RegClass == ARM::tGPRRegClassID @@ -624,28 +636,28 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, getT1tRn(insn)))); OpIdx = 2; - // We have either { imm5, tRm } or { tRm } remaining. - // Process the imm5 first. Note that STR/LDR (register) should skip the imm5 - // offset operand for t_addrmode_s[1|2|4]. + // We have either { imm5 } or { tRm } remaining. + // Note that STR/LDR (register) should skip the imm5 offset operand for + // t_addrmode_s[1|2|4]. assert(OpIdx < NumOps && "More operands expected"); if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - - MI.addOperand(MCOperand::CreateImm(Imm5 ? getT1Imm5(insn) : 0)); + // Table A6-5 16-bit Thumb Load/store instructions + // opA = 0b0101 for STR/LDR (register) and friends. + // Otherwise, we have STR/LDR (immediate) and friends. + assert(opA != 5 && "Immediate operand expected for this opcode"); + MI.addOperand(MCOperand::CreateImm(getT1Imm5(insn))); + ++OpIdx; + } else { + // The next reg operand is tRm, the offset. + assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID + && "Thumb reg operand expected"); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, + getT1tRm(insn)))); ++OpIdx; } - - // The next reg operand is tRm, the offset. - assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID - && "Thumb reg operand expected"); - MI.addOperand(MCOperand::CreateReg( - Imm5 ? 0 - : getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRm(insn)))); - ++OpIdx; - return true; } @@ -895,6 +907,10 @@ static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode, } unsigned RegListBits = slice(insn, 7, 0); + if (BitCount(RegListBits) < 1) { + DEBUG(errs() << "if BitCount(registers) < 1 then UNPREDICTABLE\n"); + return false; + } // Fill the variadic part of reglist. for (unsigned i = 0; i < 8; ++i) @@ -945,6 +961,11 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn, : (int)Imm8)); // Predicate operands by ARMBasicMCBuilder::TryPredicateAndSBitModifier(). + // But note that for tBcc, if cond = '1110' then UNDEFINED. + if (Opcode == ARM::tBcc && slice(insn, 11, 8) == 14) { + DEBUG(errs() << "if cond = '1110' then UNDEFINED\n"); + return false; + } NumOpsAdded = 1; return true; @@ -965,11 +986,7 @@ static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned Imm11 = getT1Imm11(insn); - // When executing a Thumb instruction, PC reads as the address of the current - // instruction plus 4. The assembler subtracts 4 from the difference between - // the branch instruction and the target address, disassembler has to add 4 to - // to compensate. - MI.addOperand(MCOperand::CreateImm(SignExtend32<12>(Imm11 << 1) + 4)); + MI.addOperand(MCOperand::CreateImm(SignExtend32<12>(Imm11 << 1))); NumOpsAdded = 1; @@ -1129,8 +1146,12 @@ static bool DisassembleThumb2SRS(MCInst &MI, unsigned Opcode, uint32_t insn, // t2RFE[IA|DB]W/t2RFE[IA|DB]: Rn static bool DisassembleThumb2RFE(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); + unsigned Rn = decodeRn(insn); + if (Rn == 15) { + DEBUG(errs() << "if n == 15 then UNPREDICTABLE\n"); + return false; + } + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,ARM::GPRRegClassID,Rn))); NumOpsAdded = 1; return true; } @@ -1149,7 +1170,7 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn, Opcode == ARM::t2STMIA || Opcode == ARM::t2STMIA_UPD || Opcode == ARM::t2STMDB || Opcode == ARM::t2STMDB_UPD) && "Unexpected opcode"); - assert(NumOps >= 5 && "Thumb2 LdStMul expects NumOps >= 5"); + assert(NumOps >= 4 && "Thumb2 LdStMul expects NumOps >= 4"); NumOpsAdded = 0; @@ -1203,45 +1224,79 @@ static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx = 0; assert(NumOps >= 2 - && OpInfo[0].RegClass == ARM::GPRRegClassID - && OpInfo[1].RegClass == ARM::GPRRegClassID + && OpInfo[0].RegClass > 0 + && OpInfo[1].RegClass > 0 && "Expect >=2 operands and first two as reg operands"); bool isStore = (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH); bool isSW = (Opcode == ARM::t2LDREX || Opcode == ARM::t2STREX); bool isDW = (Opcode == ARM::t2LDREXD || Opcode == ARM::t2STREXD); + unsigned Rt = decodeRd(insn); + unsigned Rt2 = decodeRs(insn); // But note that this is Rd for t2STREX. + unsigned Rd = decodeRm(insn); + unsigned Rn = decodeRn(insn); + + // Some sanity checking first. + if (isStore) { + // if d == n || d == t then UNPREDICTABLE + // if d == n || d == t || d == t2 then UNPREDICTABLE + if (isDW) { + if (Rd == Rn || Rd == Rt || Rd == Rt2) { + DEBUG(errs() << "if d == n || d == t || d == t2 then UNPREDICTABLE\n"); + return false; + } + } else { + if (isSW) { + if (Rt2 == Rn || Rt2 == Rt) { + DEBUG(errs() << "if d == n || d == t then UNPREDICTABLE\n"); + return false; + } + } else { + if (Rd == Rn || Rd == Rt) { + DEBUG(errs() << "if d == n || d == t then UNPREDICTABLE\n"); + return false; + } + } + } + } else { + // Load + // A8.6.71 LDREXD + // if t == t2 then UNPREDICTABLE + if (isDW && Rt == Rt2) { + DEBUG(errs() << "if t == t2 then UNPREDICTABLE\n"); + return false; + } + } + // Add the destination operand for store. if (isStore) { MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::GPRRegClassID, - isSW ? decodeRs(insn) : decodeRm(insn)))); + getRegisterEnum(B, OpInfo[OpIdx].RegClass, + isSW ? Rt2 : Rd))); ++OpIdx; } // Source operand for store and destination operand for load. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, + Rt))); ++OpIdx; // Thumb2 doubleword complication: with an extra source/destination operand. if (isDW) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRs(insn)))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass, + Rt2))); ++OpIdx; } // Finally add the pointer operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, + Rn))); ++OpIdx; return true; } -// LLVM, as of Jan-05-2010, does not output <Rt2>, i.e., Rs, in the asm. -// Whereas the ARM Arch. Manual does not require that t2 = t+1 like in ARM ISA. -// // t2LDRDi8: Rd Rs Rn imm8s4 (offset mode) // t2LDRDpci: Rd Rs imm8s4 (Not decoded, prefer the generic t2LDRDi8 version) // t2STRDi8: Rd Rs Rn imm8s4 (offset mode) @@ -1255,18 +1310,50 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode, if (!OpInfo) return false; assert(NumOps >= 4 - && OpInfo[0].RegClass == ARM::GPRRegClassID - && OpInfo[1].RegClass == ARM::GPRRegClassID - && OpInfo[2].RegClass == ARM::GPRRegClassID + && OpInfo[0].RegClass > 0 + && OpInfo[0].RegClass == OpInfo[1].RegClass + && OpInfo[2].RegClass > 0 && OpInfo[3].RegClass < 0 && "Expect >= 4 operands and first 3 as reg operands"); + // Thumnb allows for specifying Rt and Rt2, unlike ARM (which has Rt2==Rt+1). + unsigned Rt = decodeRd(insn); + unsigned Rt2 = decodeRs(insn); + unsigned Rn = decodeRn(insn); + + // Some sanity checking first. + + // A8.6.67 LDRD (literal) has its W bit as (0). + if (Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST) { + if (Rn == 15 && slice(insn, 21, 21) != 0) + return false; + } else { + // For Dual Store, PC cannot be used as the base register. + if (Rn == 15) { + DEBUG(errs() << "if n == 15 then UNPREDICTABLE\n"); + return false; + } + } + if (Rt == Rt2) { + DEBUG(errs() << "if t == t2 then UNPREDICTABLE\n"); + return false; + } + if (Opcode != ARM::t2LDRDi8 && Opcode != ARM::t2STRDi8) { + if (Rn == Rt || Rn == Rt2) { + DEBUG(errs() << "if wback && (n == t || n == t2) then UNPREDICTABLE\n"); + return false; + } + } + // Add the <Rt> <Rt2> operands. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + unsigned RegClassPair = OpInfo[0].RegClass; + unsigned RegClassBase = OpInfo[2].RegClass; + + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassPair, decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassPair, decodeRs(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassBase, decodeRn(insn)))); // Finally add (+/-)imm8*4, depending on the U bit. @@ -1394,9 +1481,12 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - if (Thumb2ShiftOpcode(Opcode)) - MI.addOperand(MCOperand::CreateImm(getShiftAmtBits(insn))); - else { + if (Thumb2ShiftOpcode(Opcode)) { + unsigned Imm = getShiftAmtBits(insn); + ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 5, 4)); + getImmShiftSE(ShOp, Imm); + MI.addOperand(MCOperand::CreateImm(Imm)); + } else { // Build the constant shift specifier operand. unsigned bits2 = getShiftTypeBits(insn); unsigned imm5 = getShiftAmtBits(insn); @@ -1421,7 +1511,8 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const TargetInstrDesc &TID = ARMInsts[Opcode]; + const TargetOperandInfo *OpInfo = TID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1448,8 +1539,15 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n"); return false; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID, - decodeRn(insn)))); + int Idx; + if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) { + // The reg operand is tied to the first reg operand. + MI.addOperand(MI.getOperand(Idx)); + } else { + // Add second reg operand. + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID, + decodeRn(insn)))); + } ++OpIdx; } @@ -1518,7 +1616,7 @@ static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn, // o t2ADDri12, t2SUBri12: Rs Rn imm12 // o t2LEApcrel (ADR): Rs imm12 // o t2BFC (BFC): Rs Ro(TIED_TO) bf_inv_mask_imm -// o t2BFI (BFI) (Currently not defined in LLVM as of Jan-07-2010) +// o t2BFI (BFI): Rs Ro(TIED_TO) Rn bf_inv_mask_imm // o t2MOVi16: Rs imm16 // o t2MOVTi16: Rs imm16 // o t2SBFX (SBFX): Rs Rn lsb width @@ -1579,9 +1677,10 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, if (Opcode == ARM::t2ADDri12 || Opcode == ARM::t2SUBri12 || Opcode == ARM::t2LEApcrel) MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn))); - else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) - MI.addOperand(MCOperand::CreateImm(getImm16(insn))); - else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) { + else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) { + if (!B->tryAddingSymbolicOperand(getImm16(insn), 4, MI)) + MI.addOperand(MCOperand::CreateImm(getImm16(insn))); + } else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) { uint32_t mask = 0; if (getBitfieldInvMask(insn, mask)) MI.addOperand(MCOperand::CreateImm(mask)); @@ -1625,8 +1724,7 @@ static inline bool t2MiscCtrlInstr(uint32_t insn) { // A8.6.26 // t2BXJ -> Rn // -// Miscellaneous control: t2DMBsy (and its t2DMB variants), -// t2DSBsy (and its t2DSB varianst), t2ISBsy, t2CLREX +// Miscellaneous control: // -> no operand (except pred-imm pred-ccr for CLREX, memory barrier variants) // // Hint: t2NOP, t2YIELD, t2WFE, t2WFI, t2SEV @@ -1643,6 +1741,22 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, if (NumOps == 0) return true; + if (Opcode == ARM::t2DMB || Opcode == ARM::t2DSB) { + // Inst{3-0} encodes the memory barrier option for the variants. + unsigned opt = slice(insn, 3, 0); + switch (opt) { + case ARM_MB::SY: case ARM_MB::ST: + case ARM_MB::ISH: case ARM_MB::ISHST: + case ARM_MB::NSH: case ARM_MB::NSHST: + case ARM_MB::OSH: case ARM_MB::OSHST: + MI.addOperand(MCOperand::CreateImm(opt)); + NumOpsAdded = 1; + return true; + default: + return false; + } + } + if (t2MiscCtrlInstr(insn)) return true; @@ -1719,6 +1833,17 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, return true; } + // Some instructions have predicate operands first before the immediate. + if (Opcode == ARM::tBLXi_r9 || Opcode == ARM::tBLr9) { + // Handling the two predicate operands before the imm operand. + if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) + NumOpsAdded += 2; + else { + DEBUG(errs() << "Expected predicate operands not found.\n"); + return false; + } + } + // Add the imm operand. int Offset = 0; @@ -1739,13 +1864,12 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, Offset = decodeImm32_BLX(insn); break; } - // When executing a Thumb instruction, PC reads as the address of the current - // instruction plus 4. The assembler subtracts 4 from the difference between - // the branch instruction and the target address, disassembler has to add 4 to - // to compensate. - MI.addOperand(MCOperand::CreateImm(Offset + 4)); - NumOpsAdded = 1; + if (!B->tryAddingSymbolicOperand(Offset + B->getBuilderAddress() + 4, 4, MI)) + MI.addOperand(MCOperand::CreateImm(Offset)); + + // This is an increment as some predicate operands may have been added first. + NumOpsAdded += 1; return true; } @@ -1787,7 +1911,7 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, decodeRn(insn)))); ++OpIdx; - if (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) { + if (OpInfo[OpIdx].RegClass == ARM::rGPRRegClassID) { MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); } else { @@ -1795,17 +1919,17 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); int Offset = 0; - if (slice(insn, 19, 16) == 0xFF) { - bool Negative = slice(insn, 23, 23) == 0; - unsigned Imm12 = getImm12(insn); - Offset = Negative ? -1 - Imm12 : 1 * Imm12; - } else if (Opcode == ARM::t2PLDi8 || Opcode == ARM::t2PLDWi8 || - Opcode == ARM::t2PLIi8) { + if (Opcode == ARM::t2PLDi8 || Opcode == ARM::t2PLDWi8 || + Opcode == ARM::t2PLIi8) { // A8.6.117 Encoding T2: add = FALSE unsigned Imm8 = getImm8(insn); - Offset = -1 - Imm8; - } else // The i12 forms. See, for example, A8.6.117 Encoding T1. + Offset = -1 * Imm8; + } else { + // The i12 forms. See, for example, A8.6.117 Encoding T1. + // Note that currently t2PLDi12 also handles the previously named t2PLDpci + // opcode, that's why we use decodeImm12(insn) which returns +/- imm12. Offset = decodeImm12(insn); + } MI.addOperand(MCOperand::CreateImm(Offset)); } ++OpIdx; @@ -1820,6 +1944,87 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } +static bool BadRegsThumb2LdSt(unsigned Opcode, uint32_t insn, bool Load, + unsigned R0, unsigned R1, unsigned R2, bool UseRm, bool WB) { + + // Inst{22-21} encodes the data item transferred for load/store. + // For single word, it is encoded as ob10. + bool Word = (slice(insn, 22, 21) == 2); + bool Half = (slice(insn, 22, 21) == 1); + bool Byte = (slice(insn, 22, 21) == 0); + + if (UseRm && BadReg(R2)) { + DEBUG(errs() << "if BadReg(m) then UNPREDICTABLE\n"); + return true; + } + + if (Load) { + if (!Word && R0 == 13) { + DEBUG(errs() << "if t == 13 then UNPREDICTABLE\n"); + return true; + } + if (Byte) { + if (WB && R0 == 15 && slice(insn, 10, 8) == 3) { + // A8.6.78 LDRSB (immediate) Encoding T2 (errata markup 8.0) + DEBUG(errs() << "if t == 15 && PUW == '011' then UNPREDICTABLE\n"); + return true; + } + } + // A6.3.8 Load halfword, memory hints + if (Half) { + if (WB) { + if (R0 == R1) { + // A8.6.82 LDRSH (immediate) Encoding T2 + DEBUG(errs() << "if WB && n == t then UNPREDICTABLE\n"); + return true; + } + if (R0 == 15 && slice(insn, 10, 8) == 3) { + // A8.6.82 LDRSH (immediate) Encoding T2 (errata markup 8.0) + DEBUG(errs() << "if t == 15 && PUW == '011' then UNPREDICTABLE\n"); + return true; + } + } else { + if (Opcode == ARM::t2LDRHi8 || Opcode == ARM::t2LDRSHi8) { + if (R0 == 15 && slice(insn, 10, 8) == 4) { + // A8.6.82 LDRSH (immediate) Encoding T2 + DEBUG(errs() << "if Rt == '1111' and PUW == '100' then SEE" + << " \"Unallocated memory hints\"\n"); + return true; + } + } else { + if (R0 == 15) { + // A8.6.82 LDRSH (immediate) Encoding T1 + DEBUG(errs() << "if Rt == '1111' then SEE" + << " \"Unallocated memory hints\"\n"); + return true; + } + } + } + } + } else { + if (WB && R0 == R1) { + DEBUG(errs() << "if wback && n == t then UNPREDICTABLE\n"); + return true; + } + if ((WB && R0 == 15) || (!WB && R1 == 15)) { + DEBUG(errs() << "if Rn == '1111' then UNDEFINED\n"); + return true; + } + if (Word) { + if ((WB && R1 == 15) || (!WB && R0 == 15)) { + DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n"); + return true; + } + } else { + if ((WB && BadReg(R1)) || (!WB && BadReg(R0))) { + DEBUG(errs() << "if BadReg(t) then UNPREDICTABLE\n"); + return true; + } + } + } + return false; +} + // A6.3.10 Store single data item // A6.3.9 Load byte, memory hints // A6.3.8 Load halfword, memory hints @@ -1865,16 +2070,16 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, OpIdx = 0; assert(NumOps >= 3 && - OpInfo[0].RegClass == ARM::GPRRegClassID && - OpInfo[1].RegClass == ARM::GPRRegClassID && + OpInfo[0].RegClass > 0 && + OpInfo[1].RegClass > 0 && "Expect >= 3 operands and first two as reg operands"); - bool ThreeReg = (OpInfo[2].RegClass == ARM::GPRRegClassID); + bool ThreeReg = (OpInfo[2].RegClass > 0); bool TIED_TO = ThreeReg && TID.getOperandConstraint(2, TOI::TIED_TO) != -1; bool Imm12 = !ThreeReg && slice(insn, 23, 23) == 1; // ARMInstrThumb2.td // Build the register operands, followed by the immediate. - unsigned R0, R1, R2 = 0; + unsigned R0 = 0, R1 = 0, R2 = 0; unsigned Rd = decodeRd(insn); int Imm = 0; @@ -1905,19 +2110,24 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, Imm = decodeImm8(insn); } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, R0))); ++OpIdx; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, R1))); ++OpIdx; if (ThreeReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + // This could be an offset register or a TIED_TO register. + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass, R2))); ++OpIdx; } + if (BadRegsThumb2LdSt(Opcode, insn, Load, R0, R1, R2, ThreeReg & !TIED_TO, + TIED_TO)) + return false; + assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); @@ -1947,25 +2157,25 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx = 0; assert(NumOps >= 2 && - OpInfo[0].RegClass == ARM::rGPRRegClassID && - OpInfo[1].RegClass == ARM::rGPRRegClassID && + OpInfo[0].RegClass > 0 && + OpInfo[1].RegClass > 0 && "Expect >= 2 operands and first two as reg operands"); // Build the register operands, followed by the optional rotation amount. - bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::rGPRRegClassID; + bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass > 0; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeRs(insn)))); ++OpIdx; if (ThreeReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass, decodeRn(insn)))); ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeRm(insn)))); ++OpIdx; diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 1499da0..fc2aa75 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -29,6 +29,9 @@ StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const { return getInstructionName(Opcode); } +StringRef ARMInstPrinter::getRegName(unsigned RegNo) const { + return getRegisterName(RegNo); +} void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { unsigned Opcode = MI->getOpcode(); @@ -133,9 +136,10 @@ static void printSOImm(raw_ostream &O, int64_t V, raw_ostream *CommentStream, unsigned Rot = ARM_AM::getSOImmValRot(V); // Print low-level immediate formation info, per - // A5.1.3: "Data-processing operands - Immediate". + // A5.2.3: Data-processing (immediate), and + // A5.2.4: Modified immediate constants in ARM instructions if (Rot) { - O << "#" << Imm << ", " << Rot; + O << "#" << Imm << ", #" << Rot; // Pretty printed version. if (CommentStream) *CommentStream << (int)ARM_AM::rotr32(Imm, Rot) << "\n"; @@ -178,18 +182,16 @@ void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum, } } +//===--------------------------------------------------------------------===// +// Addressing Mode #2 +//===--------------------------------------------------------------------===// -void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op, - raw_ostream &O) { +void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, + raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); const MCOperand &MO2 = MI->getOperand(Op+1); const MCOperand &MO3 = MI->getOperand(Op+2); - if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. - printOperand(MI, Op, O); - return; - } - O << "[" << getRegisterName(MO1.getReg()); if (!MO2.getReg()) { @@ -212,6 +214,50 @@ void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op, O << "]"; } +void ARMInstPrinter::printAM2PostIndexOp(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + const MCOperand &MO3 = MI->getOperand(Op+2); + + O << "[" << getRegisterName(MO1.getReg()) << "], "; + + if (!MO2.getReg()) { + unsigned ImmOffs = ARM_AM::getAM2Offset(MO3.getImm()); + O << '#' + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) + << ImmOffs; + return; + } + + O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) + << getRegisterName(MO2.getReg()); + + if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm())) + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm())) + << " #" << ShImm; +} + +void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(Op); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op, O); + return; + } + + const MCOperand &MO3 = MI->getOperand(Op+2); + unsigned IdxMode = ARM_AM::getAM2IdxMode(MO3.getImm()); + + if (IdxMode == ARMII::IndexModePost) { + printAM2PostIndexOp(MI, Op, O); + return; + } + printAM2PreOrOffsetIndexOp(MI, Op, O); +} + void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { @@ -235,11 +281,35 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI, << " #" << ShImm; } -void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); - const MCOperand &MO3 = MI->getOperand(OpNum+2); +//===--------------------------------------------------------------------===// +// Addressing Mode #3 +//===--------------------------------------------------------------------===// + +void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + const MCOperand &MO3 = MI->getOperand(Op+2); + + O << "[" << getRegisterName(MO1.getReg()) << "], "; + + if (MO2.getReg()) { + O << (char)ARM_AM::getAM3Op(MO3.getImm()) + << getRegisterName(MO2.getReg()); + return; + } + + unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()); + O << '#' + << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())) + << ImmOffs; +} + +void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + const MCOperand &MO3 = MI->getOperand(Op+2); O << '[' << getRegisterName(MO1.getReg()); @@ -256,6 +326,18 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum, O << ']'; } +void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &MO3 = MI->getOperand(Op+2); + unsigned IdxMode = ARM_AM::getAM3IdxMode(MO3.getImm()); + + if (IdxMode == ARMII::IndexModePost) { + printAM3PostIndexOp(MI, Op, O); + return; + } + printAM3PreOrOffsetIndexOp(MI, Op, O); +} + void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { @@ -314,6 +396,12 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum, O << "]"; } +void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + O << "[" << getRegisterName(MO1.getReg()) << "]"; +} + void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { @@ -414,16 +502,6 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, } } -void ARMInstPrinter::printNegZeroOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNum); - O << '#'; - if (Op.getImm() < 0) - O << '-' << (-Op.getImm() - 1); - else - O << Op.getImm(); -} - void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 679d313..b3ac03a 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -17,14 +17,18 @@ #include "llvm/MC/MCInstPrinter.h" namespace llvm { - class MCOperand; + +class MCOperand; +class TargetMachine; class ARMInstPrinter : public MCInstPrinter { public: - ARMInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {} + ARMInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI) + : MCInstPrinter(MAI) {} virtual void printInst(const MCInst *MI, raw_ostream &O); virtual StringRef getOpcodeName(unsigned Opcode) const; + virtual StringRef getRegName(unsigned RegNo) const; static const char *getInstructionName(unsigned Opcode); @@ -38,15 +42,25 @@ public: void printSOImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printSORegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAM3PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); @@ -87,9 +101,7 @@ public: void printSetendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printCPSOptionOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printNegZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index 9a27e2f..f6d0242 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -15,11 +15,13 @@ #define DEBUG_TYPE "mlx-expansion" #include "ARM.h" #include "ARMBaseInstrInfo.h" +#include "ARMSubtarget.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -49,15 +51,17 @@ namespace { const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; + bool isA9; unsigned MIIdx; MachineInstr* LastMIs[4]; + SmallPtrSet<MachineInstr*, 4> IgnoreStall; void clearStack(); void pushStack(MachineInstr *MI); MachineInstr *getAccDefMI(MachineInstr *MI) const; unsigned getDefReg(MachineInstr *MI) const; bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const; - bool FindMLxHazard(MachineInstr *MI) const; + bool FindMLxHazard(MachineInstr *MI); void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, unsigned MulOpc, unsigned AddSubOpc, bool NegAcc, bool HasLane); @@ -146,7 +150,7 @@ bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { } -bool MLxExpansion::FindMLxHazard(MachineInstr *MI) const { +bool MLxExpansion::FindMLxHazard(MachineInstr *MI) { if (NumExpand >= ExpandLimit) return false; @@ -154,7 +158,7 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) const { return true; MachineInstr *DefMI = getAccDefMI(MI); - if (TII->isFpMLxInstruction(DefMI->getOpcode())) + if (TII->isFpMLxInstruction(DefMI->getOpcode())) { // r0 = vmla // r3 = vmla r0, r1, r2 // takes 16 - 17 cycles @@ -163,24 +167,33 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) const { // r4 = vmul r1, r2 // r3 = vadd r0, r4 // takes about 14 - 15 cycles even with vmul stalling for 4 cycles. + IgnoreStall.insert(DefMI); return true; + } + + if (IgnoreStall.count(MI)) + return false; // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall // preserves the in-order retirement of the instructions. // Look at the next few instructions, if *most* of them can cause hazards, // then the scheduler can't *fix* this, we'd better break up the VMLA. + unsigned Limit1 = isA9 ? 1 : 4; + unsigned Limit2 = isA9 ? 1 : 4; for (unsigned i = 1; i <= 4; ++i) { int Idx = ((int)MIIdx - i + 4) % 4; MachineInstr *NextMI = LastMIs[Idx]; if (!NextMI) continue; - if (TII->canCauseFpMLxStall(NextMI->getOpcode())) - return true; + if (TII->canCauseFpMLxStall(NextMI->getOpcode())) { + if (i <= Limit1) + return true; + } // Look for VMLx RAW hazard. - if (hasRAWHazard(getDefReg(MI), NextMI)) + if (i <= Limit2 && hasRAWHazard(getDefReg(MI), NextMI)) return true; } @@ -248,6 +261,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { bool Changed = false; clearStack(); + IgnoreStall.clear(); unsigned Skip = 0; MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend(); @@ -299,6 +313,8 @@ bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) { TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo()); TRI = Fn.getTarget().getRegisterInfo(); MRI = &Fn.getRegInfo(); + const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); + isA9 = STI->isCortexA9(); bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 9fc3fb9..8ba9a27 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -657,3 +657,27 @@ Note that both "tst" and "moveq" are redundant. //===---------------------------------------------------------------------===// +When loading immediate constants with movt/movw, if there are multiple +constants needed with the same low 16 bits, and those values are not live at +the same time, it would be possible to use a single movw instruction, followed +by multiple movt instructions to rewrite the high bits to different values. +For example: + + volatile store i32 -1, i32* inttoptr (i32 1342210076 to i32*), align 4, + !tbaa +!0 + volatile store i32 -1, i32* inttoptr (i32 1342341148 to i32*), align 4, + !tbaa +!0 + +is compiled and optimized to: + + movw r0, #32796 + mov.w r1, #-1 + movt r0, #20480 + str r1, [r0] + movw r0, #32796 @ <= this MOVW is not needed, value is there already + movt r0, #20482 + str r1, [r0] + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 233e165..dee3d27 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -34,13 +34,14 @@ bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const return !MF.getFrameInfo()->hasVarSizedObjects(); } -static void emitSPUpdate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - const TargetInstrInfo &TII, DebugLoc dl, - const Thumb1RegisterInfo &MRI, - int NumBytes) { - emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII, - MRI, dl); +static void +emitSPUpdate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + const TargetInstrInfo &TII, DebugLoc dl, + const Thumb1RegisterInfo &MRI, + int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) { + emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, + MRI, MIFlags); } void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { @@ -70,11 +71,13 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { int FramePtrSpillFI = 0; if (VARegSaveSize) - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize); + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize, + MachineInstr::FrameSetup); if (!AFI->hasStackFrame()) { if (NumBytes != 0) - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes); + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, + MachineInstr::FrameSetup); return; } @@ -131,7 +134,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { // Adjust FP so it point to the stack slot that contains the previous FP. if (hasFP(MF)) { BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) - .addFrameIndex(FramePtrSpillFI).addImm(0); + .addFrameIndex(FramePtrSpillFI).addImm(0) + .setMIFlags(MachineInstr::FrameSetup); if (NumBytes > 7) // If offset is > 7 then sp cannot be adjusted in a single instruction, // try restoring from fp instead. @@ -140,7 +144,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { if (NumBytes) // Insert it after all the callee-save spills. - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes); + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, + MachineInstr::FrameSetup); if (STI.isTargetELF() && hasFP(MF)) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - @@ -156,7 +161,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { // to reference locals. if (RegInfo->hasBasePointer(MF)) BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP); - + // If the frame has variable sized objects then the epilogue must restore // the sp from fp. We can assume there's an FP here since hasFP already // checks for hasVarSizedObjects. @@ -232,8 +237,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, if (NumBytes) { assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && "No scratch register to restore SP from FP!"); - emitThumbRegPlusImmediate(MBB, MBBI, ARM::R4, FramePtr, -NumBytes, - TII, *RegInfo, dl); + emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, + TII, *RegInfo); BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) .addReg(ARM::R4); } else @@ -307,6 +312,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, MIB.addReg(Reg, getKillRegState(isKill)); } + MIB.setMIFlags(MachineInstr::FrameSetup); return true; } diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h index c592e12..bcfc516 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.h +++ b/lib/Target/ARM/Thumb1FrameLowering.h @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #ifndef __THUMB_FRAMEINFO_H_ -#define __THUMM_FRAMEINFO_H_ +#define __THUMB_FRAMEINFO_H_ #include "ARM.h" #include "ARMFrameLowering.h" diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index f62a13e..33cefb6 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -31,8 +31,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -48,15 +46,29 @@ Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, : ARMBaseRegisterInfo(tii, sti) { } +const TargetRegisterClass* +Thumb1RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) + const { + if (RC == ARM::tGPRRegisterClass || RC->hasSuperClass(ARM::tGPRRegisterClass)) + return ARM::tGPRRegisterClass; + return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC); +} + +const TargetRegisterClass * +Thumb1RegisterInfo::getPointerRegClass(unsigned Kind) const { + return ARM::tGPRRegisterClass; +} + /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. -void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - DebugLoc dl, - unsigned DestReg, unsigned SubIdx, - int Val, - ARMCC::CondCodes Pred, - unsigned PredReg) const { +void +Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + DebugLoc dl, + unsigned DestReg, unsigned SubIdx, + int Val, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get( @@ -64,8 +76,9 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRpci)) - .addReg(DestReg, getDefRegState(true), SubIdx) - .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg); + .addReg(DestReg, getDefRegState(true), SubIdx) + .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg) + .setMIFlags(MIFlags); } @@ -76,11 +89,12 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, static void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, bool CanChangeCC, const TargetInstrInfo &TII, const ARMBaseRegisterInfo& MRI, - DebugLoc dl) { + unsigned MIFlags = MachineInstr::NoFlags) { MachineFunction &MF = *MBB.getParent(); bool isHigh = !isARMLowRegister(DestReg) || (BaseReg != 0 && !isARMLowRegister(BaseReg)); @@ -101,14 +115,15 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, if (NumBytes <= 255 && NumBytes >= 0) AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)) - .addImm(NumBytes); + .addImm(NumBytes).setMIFlags(MIFlags); else if (NumBytes < 0 && NumBytes >= -255) { AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)) - .addImm(NumBytes); + .addImm(NumBytes).setMIFlags(MIFlags); AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg)) - .addReg(LdReg, RegState::Kill); + .addReg(LdReg, RegState::Kill).setMIFlags(MIFlags); } else - MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes); + MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes, + ARMCC::AL, 0, MIFlags); // Emit add / sub. int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr); @@ -151,10 +166,11 @@ static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes, /// a destreg = basereg + immediate in Thumb code. void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, const TargetInstrInfo &TII, const ARMBaseRegisterInfo& MRI, - DebugLoc dl) { + unsigned MIFlags) { bool isSub = NumBytes < 0; unsigned Bytes = (unsigned)NumBytes; if (isSub) Bytes = -NumBytes; @@ -211,8 +227,9 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, if (NumMIs > Threshold) { // This will expand into too many instructions. Load the immediate from a // constpool entry. - emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII, - MRI, dl); + emitThumbRegPlusImmInReg(MBB, MBBI, dl, + DestReg, BaseReg, NumBytes, true, + TII, MRI, MIFlags); return; } @@ -224,11 +241,12 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, Bytes -= ThisVal; const TargetInstrDesc &TID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3); const MachineInstrBuilder MIB = - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg)); + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg).setMIFlags(MIFlags)); AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal)); } else { BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) - .addReg(BaseReg, RegState::Kill); + .addReg(BaseReg, RegState::Kill) + .setMIFlags(MIFlags); } BaseReg = DestReg; } @@ -243,9 +261,10 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); if (NeedCC) MIB = AddDefaultT1CC(MIB); - MIB .addReg(DestReg).addImm(ThisVal); + MIB.addReg(DestReg).addImm(ThisVal); if (NeedPred) MIB = AddDefaultPred(MIB); + MIB.setMIFlags(MIFlags); } else { bool isKill = BaseReg != ARM::SP; @@ -255,8 +274,9 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal); if (NeedPred) MIB = AddDefaultPred(MIB); - BaseReg = DestReg; + MIB.setMIFlags(MIFlags); + BaseReg = DestReg; if (Opc == ARM::tADDrSPi) { // r4 = add sp, imm // r4 = add r4, imm @@ -274,7 +294,8 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, const TargetInstrDesc &TID = TII.get(ExtraOpc); AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg)) .addReg(DestReg, RegState::Kill) - .addImm(((unsigned)NumBytes) & 3)); + .addImm(((unsigned)NumBytes) & 3) + .setMIFlags(MIFlags)); } } @@ -283,8 +304,8 @@ static void emitSPUpdate(MachineBasicBlock &MBB, const TargetInstrInfo &TII, DebugLoc dl, const Thumb1RegisterInfo &MRI, int NumBytes) { - emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII, - MRI, dl); + emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, + MRI); } void Thumb1RegisterInfo:: @@ -337,7 +358,7 @@ static void emitThumbConstant(MachineBasicBlock &MBB, DestReg)) .addImm(ThisVal)); if (Imm > 0) - emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl); + emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg, DestReg, Imm, TII, MRI); if (isSub) { const TargetInstrDesc &TID = TII.get(ARM::tRSB); AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg)) @@ -430,8 +451,8 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, // MI would expand into a large number of instructions. Don't try to // simplify the immediate. if (NumMIs > 2) { - emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII, - *this, dl); + emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII, + *this); MBB.erase(II); return true; } @@ -450,8 +471,8 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, } Offset = (Offset - Mask * Scale); MachineBasicBlock::iterator NII = llvm::next(II); - emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII, - *this, dl); + emitThumbRegPlusImmediate(MBB, NII, dl, DestReg, DestReg, Offset, TII, + *this); } else { // Translate r0 = add sp, -imm to // r0 = -imm (this is then translated into a series of instructons) @@ -645,15 +666,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, bool UseRR = false; if (Opcode == ARM::tRestore) { if (FrameReg == ARM::SP) - emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, - Offset, false, TII, *this, dl); + emitThumbRegPlusImmInReg(MBB, II, dl, TmpReg, FrameReg, + Offset, false, TII, *this); else { emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset); UseRR = true; } } else { - emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, - *this, dl); + emitThumbRegPlusImmediate(MBB, II, dl, TmpReg, FrameReg, Offset, TII, + *this); } MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi)); @@ -668,15 +689,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (Opcode == ARM::tSpill) { if (FrameReg == ARM::SP) - emitThumbRegPlusImmInReg(MBB, II, VReg, FrameReg, - Offset, false, TII, *this, dl); + emitThumbRegPlusImmInReg(MBB, II, dl, VReg, FrameReg, + Offset, false, TII, *this); else { emitLoadConstPool(MBB, II, dl, VReg, 0, Offset); UseRR = true; } } else - emitThumbRegPlusImmediate(MBB, II, VReg, FrameReg, Offset, TII, - *this, dl); + emitThumbRegPlusImmediate(MBB, II, dl, VReg, FrameReg, Offset, TII, + *this); MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi)); MI.getOperand(i).ChangeToRegister(VReg, false, false, true); if (UseRR) diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index 8a87cc5..9060e59 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -28,6 +28,11 @@ struct Thumb1RegisterInfo : public ARMBaseRegisterInfo { public: Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI); + const TargetRegisterClass* + getLargestLegalSuperClass(const TargetRegisterClass *RC) const; + + const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; + /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. void emitLoadConstPool(MachineBasicBlock &MBB, @@ -35,7 +40,8 @@ public: DebugLoc dl, unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred = ARMCC::AL, - unsigned PredReg = 0) const; + unsigned PredReg = 0, + unsigned MIFlags = MachineInstr::NoFlags) const; /// Code Generation virtual methods... void eliminateCallFramePseudoInstr(MachineFunction &MF, diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 9b1073b..d169dbb 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -184,7 +184,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, - const ARMBaseInstrInfo &TII) { + const ARMBaseInstrInfo &TII, unsigned MIFlags) { bool isSub = NumBytes < 0; if (isSub) NumBytes = -NumBytes; @@ -198,14 +198,14 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, // Use a movw to materialize the 16-bit constant. BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), DestReg) .addImm(NumBytes) - .addImm((unsigned)Pred).addReg(PredReg); + .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags); Fits = true; } else if ((NumBytes & 0xffff) == 0) { // Use a movt to materialize the 32-bit constant. BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), DestReg) .addReg(DestReg) .addImm(NumBytes >> 16) - .addImm((unsigned)Pred).addReg(PredReg); + .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags); Fits = true; } @@ -214,12 +214,14 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), DestReg) .addReg(BaseReg, RegState::Kill) .addReg(DestReg, RegState::Kill) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + .addImm((unsigned)Pred).addReg(PredReg).addReg(0) + .setMIFlags(MIFlags); } else { BuildMI(MBB, MBBI, dl, TII.get(ARM::t2ADDrr), DestReg) .addReg(DestReg, RegState::Kill) .addReg(BaseReg, RegState::Kill) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + .addImm((unsigned)Pred).addReg(PredReg).addReg(0) + .setMIFlags(MIFlags); } return; } @@ -230,7 +232,8 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, unsigned Opc = 0; if (DestReg == ARM::SP && BaseReg != ARM::SP) { // mov sp, rn. Note t2MOVr cannot be used. - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr),DestReg).addReg(BaseReg); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr),DestReg) + .addReg(BaseReg).setMIFlags(MIFlags); BaseReg = ARM::SP; continue; } @@ -243,7 +246,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; // FIXME: Fix Thumb1 immediate encoding. BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) - .addReg(BaseReg).addImm(ThisVal/4); + .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags); NumBytes = 0; continue; } @@ -283,7 +286,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) .addReg(BaseReg, RegState::Kill) - .addImm(ThisVal)); + .addImm(ThisVal)).setMIFlags(MIFlags); if (HasCCOut) AddDefaultCC(MIB); diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp index 099b8f7..355c3bf 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -13,26 +13,15 @@ //===----------------------------------------------------------------------===// #include "ARM.h" -#include "ARMAddressingModes.h" -#include "ARMBaseInstrInfo.h" -#include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "Thumb2InstrInfo.h" #include "Thumb2RegisterInfo.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" -#include "llvm/LLVMContext.h" #include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLocation.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/ErrorHandling.h" using namespace llvm; Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, @@ -42,13 +31,14 @@ Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. -void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - DebugLoc dl, - unsigned DestReg, unsigned SubIdx, - int Val, - ARMCC::CondCodes Pred, - unsigned PredReg) const { +void +Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + DebugLoc dl, + unsigned DestReg, unsigned SubIdx, + int Val, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get( @@ -57,5 +47,6 @@ void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci)) .addReg(DestReg, getDefRegState(true), SubIdx) - .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0); + .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0) + .setMIFlags(MIFlags); } diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h index b3cf2e5..824378a 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.h +++ b/lib/Target/ARM/Thumb2RegisterInfo.h @@ -35,7 +35,8 @@ public: DebugLoc dl, unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred = ARMCC::AL, - unsigned PredReg = 0) const; + unsigned PredReg = 0, + unsigned MIFlags = MachineInstr::NoFlags) const; }; } diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index cc8f61c..ce2e966 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -12,6 +12,7 @@ #include "ARMAddressingModes.h" #include "ARMBaseRegisterInfo.h" #include "ARMBaseInstrInfo.h" +#include "ARMSubtarget.h" #include "Thumb2InstrInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -49,82 +50,86 @@ namespace { // 1 - No cc field. // 2 - Always set CPSR. unsigned PredCC2 : 2; + unsigned PartFlag : 1; // 16-bit instruction does partial flag update unsigned Special : 1; // Needs to be dealt with specially }; static const ReduceEntry ReduceTable[] = { - // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, S - { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0 }, - { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0 }, + // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, PF, S + { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0 }, + { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,0 }, + { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0 }, // Note: immediate scale is 4. - { ARM::t2ADDrSPi,ARM::tADDrSPi,0, 8, 0, 1, 0, 1,0, 1 }, - { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 1 }, - { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 1 }, - { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 0 }, - { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 0 }, + { ARM::t2ADDrSPi,ARM::tADDrSPi,0, 8, 0, 1, 0, 1,0, 0,1 }, + { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1 }, + { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1 }, + { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0 }, + { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0 }, + { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0 }, + { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0 }, //FIXME: Disable CMN, as CCodes are backwards from compare expectations - //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0 }, - { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0 }, - { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 1 }, - { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 0 }, + //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0 }, + { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0 }, + { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1 }, + { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0 }, // FIXME: adr.n immediate offset must be multiple of 4. - //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0 }, - { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 0 }, - { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 0 }, - { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 }, - { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1 }, + //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0 }, + { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0 }, + { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0 }, + { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0 }, + // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less + // likely to cause issue in the loop. As a size / performance workaround, + // they are not marked as such. + { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,0 }, + { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,1 }, // FIXME: Do we need the 16-bit 'S' variant? - { ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0 }, - { ARM::t2MOVCCr,0, ARM::tMOVCCr, 0, 0, 0, 0, 0,1, 0 }, - { ARM::t2MOVCCi,0, ARM::tMOVCCi, 0, 8, 0, 1, 0,1, 0 }, - { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0 }, - { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0 }, - { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0 }, - { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0 }, - { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 1 }, - { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0 }, - { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0 }, - { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0 }, - { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0 }, - { ARM::t2SXTBr, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0 }, - { ARM::t2SXTHr, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0 }, - { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0 }, - { ARM::t2UXTBr, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0 }, - { ARM::t2UXTHr, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0 }, + { ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0,0 }, + { ARM::t2MOVCCr,0, ARM::tMOVCCr, 0, 0, 0, 0, 0,1, 0,0 }, + { ARM::t2MOVCCi,0, ARM::tMOVCCi, 0, 8, 0, 1, 0,1, 0,0 }, + { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0 }, + { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0 }, + { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0 }, + { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0 }, + { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1 }, + { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0 }, + { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0 }, + { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0 }, + { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0 }, + { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0 }, + { ARM::t2SXTBr, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2SXTHr, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0 }, + { ARM::t2UXTBr, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2UXTHr, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,0 }, // FIXME: Clean this up after splitting each Thumb load / store opcode // into multiple ones. - { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 1 }, - { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 1 }, - { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 1 }, - { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 1 }, - { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 1 }, - { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 1 }, - { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 1 }, - - { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 1 }, - { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 1 }, - { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 1 }, + { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1 }, + { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1 }, + { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1 }, + + { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1 }, + { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1 }, + { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1 }, // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent - { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 1 }, - { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 1 }, + { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1 }, + { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1 }, }; class Thumb2SizeReduce : public MachineFunctionPass { @@ -133,6 +138,7 @@ namespace { Thumb2SizeReduce(); const Thumb2InstrInfo *TII; + const ARMSubtarget *STI; virtual bool runOnMachineFunction(MachineFunction &MF); @@ -144,6 +150,8 @@ namespace { /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable. DenseMap<unsigned, unsigned> ReduceOpcodeMap; + bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use); + bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, bool is2Addr, ARMCC::CondCodes Pred, bool LiveCPSR, bool &HasCC, bool &CCDead); @@ -152,19 +160,20 @@ namespace { const ReduceEntry &Entry); bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, - const ReduceEntry &Entry, bool LiveCPSR); + const ReduceEntry &Entry, bool LiveCPSR, + MachineInstr *CPSRDef); /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address /// instruction. bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR); + bool LiveCPSR, MachineInstr *CPSRDef); /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit /// non-two-address instruction. bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR); + bool LiveCPSR, MachineInstr *CPSRDef); /// ReduceMBB - Reduce width of instructions in the specified basic block. bool ReduceMBB(MachineBasicBlock &MBB); @@ -187,6 +196,52 @@ static bool HasImplicitCPSRDef(const TargetInstrDesc &TID) { return false; } +/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations, +/// the 's' 16-bit instruction partially update CPSR. Abort the +/// transformation to avoid adding false dependency on last CPSR setting +/// instruction which hurts the ability for out-of-order execution engine +/// to do register renaming magic. +/// This function checks if there is a read-of-write dependency between the +/// last instruction that defines the CPSR and the current instruction. If there +/// is, then there is no harm done since the instruction cannot be retired +/// before the CPSR setting instruction anyway. +/// Note, we are not doing full dependency analysis here for the sake of compile +/// time. We're not looking for cases like: +/// r0 = muls ... +/// r1 = add.w r0, ... +/// ... +/// = mul.w r1 +/// In this case it would have been ok to narrow the mul.w to muls since there +/// are indirect RAW dependency between the muls and the mul.w +bool +Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use) { + if (!Def || !STI->avoidCPSRPartialUpdate()) + return false; + + SmallSet<unsigned, 2> Defs; + for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = Def->getOperand(i); + if (!MO.isReg() || MO.isUndef() || MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0 || Reg == ARM::CPSR) + continue; + Defs.insert(Reg); + } + + for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = Use->getOperand(i); + if (!MO.isReg() || MO.isUndef() || MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (Defs.count(Reg)) + return false; + } + + // No read-after-write dependency. The narrowing will add false dependency. + return true; +} + bool Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, bool is2Addr, ARMCC::CondCodes Pred, @@ -410,7 +465,10 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, MIB.addOperand(MI->getOperand(OpNum)); // Transfer memoperands. - (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + + // Transfer MI flags. + MIB.setMIFlags(MI->getFlags()); DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); @@ -422,7 +480,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, bool Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR) { + bool LiveCPSR, MachineInstr *CPSRDef) { if (Entry.LowRegs1 && !VerifyLowRegs(MI)) return false; @@ -440,12 +498,12 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, switch (Opc) { default: break; case ARM::t2ADDSri: { - if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) + if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) return true; // fallthrough } case ARM::t2ADDSrr: - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); } } break; @@ -453,13 +511,13 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, case ARM::t2RSBri: case ARM::t2RSBSri: if (MI->getOperand(2).getImm() == 0) - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); break; case ARM::t2MOVi16: // Can convert only 'pure' immediate operands, not immediates obtained as // globals' addresses. if (MI->getOperand(1).isImm()) - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); break; case ARM::t2CMPrr: { // Try to reduce to the lo-reg only version first. Why there are two @@ -468,17 +526,17 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, // are prioritized, but the table assumes a unique entry for each // source insn opcode. So for now, we hack a local entry record to use. static const ReduceEntry NarrowEntry = - { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 1 }; - if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR)) + { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 }; + if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef)) return true; - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); } case ARM::t2ADDrSPi: { static const ReduceEntry NarrowEntry = - { ARM::t2ADDrSPi,ARM::tADDspi, 0, 7, 0, 1, 0, 1, 0, 1 }; + { ARM::t2ADDrSPi,ARM::tADDspi, 0, 7, 0, 1, 0, 1, 0, 0,1 }; if (MI->getOperand(0).getReg() == ARM::SP) - return ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR); - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); + return ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); } } return false; @@ -487,7 +545,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, bool Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR) { + bool LiveCPSR, MachineInstr *CPSRDef) { if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) return false; @@ -542,6 +600,12 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead)) return false; + // Avoid adding a false dependency on partial flag update by some 16-bit + // instructions which has the 's' bit set. + if (Entry.PartFlag && NewTID.hasOptionalDef() && HasCC && + canAddPseudoFlagDep(CPSRDef, MI)) + return false; + // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID); @@ -563,6 +627,9 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, MIB.addOperand(MI->getOperand(i)); } + // Transfer MI flags. + MIB.setMIFlags(MI->getFlags()); + DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase(MI); @@ -573,7 +640,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, bool Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR) { + bool LiveCPSR, MachineInstr *CPSRDef) { if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) return false; @@ -626,6 +693,12 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead)) return false; + // Avoid adding a false dependency on partial flag update by some 16-bit + // instructions which has the 's' bit set. + if (Entry.PartFlag && NewTID.hasOptionalDef() && HasCC && + canAddPseudoFlagDep(CPSRDef, MI)) + return false; + // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID); @@ -663,6 +736,9 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, if (!TID.isPredicable() && NewTID.isPredicable()) AddDefaultPred(MIB); + // Transfer MI flags. + MIB.setMIFlags(MI->getFlags()); + DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase(MI); @@ -670,7 +746,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, return true; } -static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR) { +static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) { bool HasDef = false; for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); @@ -678,6 +754,8 @@ static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR) { continue; if (MO.getReg() != ARM::CPSR) continue; + + DefCPSR = true; if (!MO.isDead()) HasDef = true; } @@ -707,6 +785,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { // Yes, CPSR could be livein. bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); + MachineInstr *CPSRDef = 0; MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); MachineBasicBlock::iterator NextMII; @@ -722,7 +801,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { const ReduceEntry &Entry = ReduceTable[OPI->second]; // Ignore "special" cases for now. if (Entry.Special) { - if (ReduceSpecial(MBB, MI, Entry, LiveCPSR)) { + if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef)) { Modified = true; MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; @@ -731,7 +810,8 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { } // Try to transform to a 16-bit two-address instruction. - if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) { + if (Entry.NarrowOpc2 && + ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) { Modified = true; MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; @@ -739,7 +819,8 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { } // Try to transform to a 16-bit non-two-address instruction. - if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR)) { + if (Entry.NarrowOpc1 && + ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef)) { Modified = true; MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; @@ -747,7 +828,14 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { } ProcessNext: - LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR); + bool DefCPSR = false; + LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); + if (MI->getDesc().isCall()) + // Calls don't really set CPSR. + CPSRDef = 0; + else if (DefCPSR) + // This is the last CPSR defining instruction. + CPSRDef = MI; } return Modified; @@ -756,6 +844,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { const TargetMachine &TM = MF.getTarget(); TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo()); + STI = &TM.getSubtarget<ARMSubtarget>(); bool Modified = false; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) diff --git a/lib/Target/Alpha/Alpha.td b/lib/Target/Alpha/Alpha.td index 4508eda..ae79c2e 100644 --- a/lib/Target/Alpha/Alpha.td +++ b/lib/Target/Alpha/Alpha.td @@ -21,7 +21,7 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// def FeatureCIX : SubtargetFeature<"cix", "HasCT", "true", - "Enable CIX extentions">; + "Enable CIX extensions">; //===----------------------------------------------------------------------===// // Register File Description diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index c4f43ab..ee404f0 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -296,7 +296,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Build a sequence of copy-to-reg nodes chained together with token chain and // flag operands which copy the outgoing args into registers. The InFlag in - // necessary since all emited instructions must be stuck together. + // necessary since all emitted instructions must be stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td index 099d715..b201712 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.td +++ b/lib/Target/Alpha/AlphaInstrInfo.td @@ -1030,7 +1030,7 @@ def : Pat<(brcond (setune F8RC:$RA, immFPZ), bb:$DISP), //WMB Mfc 18.4400 Write memory barrier //MF_FPCR F-P 17.025 Move from FPCR //MT_FPCR F-P 17.024 Move to FPCR -//There are in the Multimedia extentions, so let's not use them yet +//There are in the Multimedia extensions, so let's not use them yet //def MAXSB8 : OForm<0x1C, 0x3E, "MAXSB8 $RA,$RB,$RC">; //Vector signed byte maximum //def MAXSW4 : OForm< 0x1C, 0x3F, "MAXSW4 $RA,$RB,$RC">; //Vector signed word maximum //def MAXUB8 : OForm<0x1C, 0x3C, "MAXUB8 $RA,$RB,$RC">; //Vector unsigned byte maximum diff --git a/lib/Target/Alpha/README.txt b/lib/Target/Alpha/README.txt index 9ae1517..cc170e3 100644 --- a/lib/Target/Alpha/README.txt +++ b/lib/Target/Alpha/README.txt @@ -33,9 +33,9 @@ add crazy vector instructions (MVI): (MIN|MAX)(U|S)(B8|W4) min and max, signed and unsigned, byte and word PKWB, UNPKBW pack/unpack word to byte PKLB UNPKBL pack/unpack long to byte -PERR pixel error (sum accross bytes of bytewise abs(i8v8 a - i8v8 b)) +PERR pixel error (sum across bytes of bytewise abs(i8v8 a - i8v8 b)) -cmpbytes bytewise cmpeq of i8v8 a and i8v8 b (not part of MVI extentions) +cmpbytes bytewise cmpeq of i8v8 a and i8v8 b (not part of MVI extensions) this has some good examples for other operations that can be synthesised well from these rather meager vector ops (such as saturating add). diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp index 7c80eec..1e1f8c9 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -345,7 +345,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. - // The InFlag in necessary since all emited instructions must be + // The InFlag in necessary since all emitted instructions must be // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 6c555a3..358d1b3 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -2440,24 +2440,6 @@ void CWriter::visitReturnInst(ReturnInst &I) { return; } - if (I.getNumOperands() > 1) { - Out << " {\n"; - Out << " "; - printType(Out, I.getParent()->getParent()->getReturnType()); - Out << " llvm_cbe_mrv_temp = {\n"; - for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { - Out << " "; - writeOperand(I.getOperand(i)); - if (i != e - 1) - Out << ","; - Out << "\n"; - } - Out << " };\n"; - Out << " return llvm_cbe_mrv_temp;\n"; - Out << " }\n"; - return; - } - Out << " return"; if (I.getNumOperands()) { Out << ' '; diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td index 5ef5716..f340edf 100644 --- a/lib/Target/CellSPU/SPU64InstrInfo.td +++ b/lib/Target/CellSPU/SPU64InstrInfo.td @@ -24,7 +24,7 @@ // 5. The code sequences for r64 and v2i64 are probably overly conservative, // compared to the code that gcc produces. // -// M00$E B!tes Kan be Pretty N@sTi!!!!! (appologies to Monty!) +// M00$E B!tes Kan be Pretty N@sTi!!!!! (apologies to Monty!) //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ // selb instruction definition for i64. Note that the selection mask is diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp index 4040461..fd96694 100644 --- a/lib/Target/CellSPU/SPUAsmPrinter.cpp +++ b/lib/Target/CellSPU/SPUAsmPrinter.cpp @@ -182,6 +182,10 @@ namespace { printOp(MI->getOperand(OpNo), O); } + void printHBROperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + printOp(MI->getOperand(OpNo), O); + } + void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { // Used to generate a ".-<target>", but it turns out that the assembler // really wants the target. @@ -279,6 +283,9 @@ void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { } O << *Mang->getSymbol(MO.getGlobal()); return; + case MachineOperand::MO_MCSymbol: + O << *(MO.getMCSymbol()); + return; default: O << "<unknown operand type: " << MO.getType() << ">"; return; diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index d226156..9351ffd 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -321,12 +321,17 @@ SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base, // These match the addr256k operand type: EVT OffsVT = MVT::i16; SDValue Zero = CurDAG->getTargetConstant(0, OffsVT); + int64_t val; switch (N.getOpcode()) { case ISD::Constant: + val = dyn_cast<ConstantSDNode>(N.getNode())->getSExtValue(); + Base = CurDAG->getTargetConstant( val , MVT::i32); + Index = Zero; + return true; break; case ISD::ConstantPool: case ISD::GlobalAddress: - report_fatal_error("SPU SelectAFormAddr: Constant/Pool/Global not lowered."); + report_fatal_error("SPU SelectAFormAddr: Pool/Global not lowered."); /*NOTREACHED*/ case ISD::TargetConstant: diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 743a4d7..8668da3 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -705,7 +705,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { offset )); - // Shift the low similarily + // Shift the low similarly // TODO: add SPUISD::SHL_BYTES low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset ); diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index dd48d7b..cf883e2 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -183,14 +183,6 @@ namespace llvm { virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const; - - /// After allocating this many registers, the allocator should feel - /// register pressure. The value is a somewhat random guess, based on the - /// number of non callee saved registers in the C calling convention. - virtual unsigned getRegPressureLimit( const TargetRegisterClass *RC, - MachineFunction &MF) const{ - return 50; - } }; } diff --git a/lib/Target/CellSPU/SPUInstrFormats.td b/lib/Target/CellSPU/SPUInstrFormats.td index 21bc275..bdbe255 100644 --- a/lib/Target/CellSPU/SPUInstrFormats.td +++ b/lib/Target/CellSPU/SPUInstrFormats.td @@ -296,3 +296,25 @@ class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern> let Pattern = pattern; let Inst{31-0} = 0; } + +//===----------------------------------------------------------------------===// +// Branch hint formats +//===----------------------------------------------------------------------===// +// For hbrr and hbra +class HBI16Form<bits<7> opcode, dag IOL, string asmstr> + : Instruction { + field bits<32> Inst; + bits<16>i16; + bits<9>RO; + + let Namespace = "SPU"; + let InOperandList = IOL; + let OutOperandList = (outs); //no output + let AsmString = asmstr; + let Itinerary = BranchHints; + + let Inst{0-6} = opcode; + let Inst{7-8} = RO{8-7}; + let Inst{9-24} = i16; + let Inst{25-31} = RO{6-0}; +} diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index f9e6c72..080434d 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/MC/MCContext.h" using namespace llvm; @@ -281,9 +282,20 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, return true; } +// search MBB for branch hint labels and branch hit ops +static void removeHBR( MachineBasicBlock &MBB) { + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I){ + if (I->getOpcode() == SPU::HBRA || + I->getOpcode() == SPU::HBR_LABEL){ + I=MBB.erase(I); + } + } +} + unsigned SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { MachineBasicBlock::iterator I = MBB.end(); + removeHBR(MBB); if (I == MBB.begin()) return 0; --I; @@ -314,6 +326,23 @@ SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 2; } +/** Find the optimal position for a hint branch instruction in a basic block. + * This should take into account: + * -the branch hint delays + * -congestion of the memory bus + * -dual-issue scheduling (i.e. avoid insertion of nops) + * Current implementation is rather simplistic. + */ +static MachineBasicBlock::iterator findHBRPosition(MachineBasicBlock &MBB) +{ + MachineBasicBlock::iterator J = MBB.end(); + for( int i=0; i<8; i++) { + if( J == MBB.begin() ) return J; + J--; + } + return J; +} + unsigned SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, @@ -324,32 +353,61 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, assert((Cond.size() == 2 || Cond.size() == 0) && "SPU branch conditions have two components!"); + MachineInstrBuilder MIB; + //TODO: make a more accurate algorithm. + bool haveHBR = MBB.size()>8; + + removeHBR(MBB); + MCSymbol *branchLabel = MBB.getParent()->getContext().CreateTempSymbol(); + // Add a label just before the branch + if (haveHBR) + MIB = BuildMI(&MBB, DL, get(SPU::HBR_LABEL)).addSym(branchLabel); + // One-way branch. if (FBB == 0) { if (Cond.empty()) { // Unconditional branch - MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(SPU::BR)); + MIB = BuildMI(&MBB, DL, get(SPU::BR)); MIB.addMBB(TBB); DEBUG(errs() << "Inserted one-way uncond branch: "); DEBUG((*MIB).dump()); + + // basic blocks have just one branch so it is safe to add the hint a its + if (haveHBR) { + MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA)); + MIB.addSym(branchLabel); + MIB.addMBB(TBB); + } } else { // Conditional branch - MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); + MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); MIB.addReg(Cond[1].getReg()).addMBB(TBB); + if (haveHBR) { + MIB = BuildMI(MBB, findHBRPosition(MBB), DL, get(SPU::HBRA)); + MIB.addSym(branchLabel); + MIB.addMBB(TBB); + } + DEBUG(errs() << "Inserted one-way cond branch: "); DEBUG((*MIB).dump()); } return 1; } else { - MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); + MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); MachineInstrBuilder MIB2 = BuildMI(&MBB, DL, get(SPU::BR)); // Two-way Conditional Branch. MIB.addReg(Cond[1].getReg()).addMBB(TBB); MIB2.addMBB(FBB); + if (haveHBR) { + MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA)); + MIB.addSym(branchLabel); + MIB.addMBB(FBB); + } + DEBUG(errs() << "Inserted conditional branch: "); DEBUG((*MIB).dump()); DEBUG(errs() << "part 2: "); diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 25f6fd0..e103c9b 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -28,6 +28,8 @@ let hasCtrlDep = 1, Defs = [R1], Uses = [R1] in { def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm_i32:$amt), "${:comment} ADJCALLSTACKUP", [(callseq_end timm:$amt)]>; + def HBR_LABEL : Pseudo<(outs), (ins hbrtarget:$targ), + "$targ:\t${:comment}branch hint target",[ ]>; } //===----------------------------------------------------------------------===// @@ -2013,9 +2015,9 @@ class SHLHInst<dag OOL, dag IOL, list<dag> pattern>: RotShiftVec, pattern>; class SHLHVecInst<ValueType vectype>: - SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB), + SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), [(set (vectype VECREG:$rT), - (SPUvec_shl (vectype VECREG:$rA), R16C:$rB))]>; + (SPUvec_shl (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; multiclass ShiftLeftHalfword { @@ -2063,9 +2065,9 @@ class SHLInst<dag OOL, dag IOL, list<dag> pattern>: multiclass ShiftLeftWord { def v4i32: - SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB), + SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), [(set (v4i32 VECREG:$rT), - (SPUvec_shl (v4i32 VECREG:$rA), R16C:$rB))]>; + (SPUvec_shl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; def r32: SHLInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), [(set R32C:$rT, (shl R32C:$rA, R32C:$rB))]>; @@ -2511,19 +2513,11 @@ class ROTHMInst<dag OOL, dag IOL, list<dag> pattern>: RotShiftVec, pattern>; def ROTHMv8i16: - ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), [/* see patterns below - $rB must be negated */]>; -def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R32C:$rB), - (ROTHMv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; - -def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R16C:$rB), - (ROTHMv8i16 VECREG:$rA, - (SFIr32 (XSHWr16 R16C:$rB), 0))>; - -def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R8C:$rB), - (ROTHMv8i16 VECREG:$rA, - (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>; +def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), + (ROTHMv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>; // ROTHM r16 form: Rotate 16-bit quantity to right, zero fill at the left // Note: This instruction doesn't match a pattern because rB must be negated @@ -2584,19 +2578,11 @@ class ROTMInst<dag OOL, dag IOL, list<dag> pattern>: RotShiftVec, pattern>; def ROTMv4i32: - ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), [/* see patterns below - $rB must be negated */]>; -def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), R32C:$rB), - (ROTMv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; - -def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), R16C:$rB), - (ROTMv4i32 VECREG:$rA, - (SFIr32 (XSHWr16 R16C:$rB), 0))>; - -def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), R8C:$rB), - (ROTMv4i32 VECREG:$rA, - (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; +def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), + (ROTMv4i32 VECREG:$rA, (SFIvec VECREG:$rB, 0))>; def ROTMr32: ROTMInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), @@ -2802,20 +2788,12 @@ defm ROTQMBII: RotateMaskQuadByBitsImm; //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ def ROTMAHv8i16: - RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), "rotmah\t$rT, $rA, $rB", RotShiftVec, [/* see patterns below - $rB must be negated */]>; -def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R32C:$rB), - (ROTMAHv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; - -def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R16C:$rB), - (ROTMAHv8i16 VECREG:$rA, - (SFIr32 (XSHWr16 R16C:$rB), 0))>; - -def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R8C:$rB), - (ROTMAHv8i16 VECREG:$rA, - (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; +def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), + (ROTMAHv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>; def ROTMAHr16: RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB), @@ -2857,20 +2835,12 @@ def : Pat<(sra R16C:$rA, (i8 imm:$val)), (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>; def ROTMAv4i32: - RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), "rotma\t$rT, $rA, $rB", RotShiftVec, [/* see patterns below - $rB must be negated */]>; -def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R32C:$rB), - (ROTMAv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; - -def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R16C:$rB), - (ROTMAv4i32 VECREG:$rA, - (SFIr32 (XSHWr16 R16C:$rB), 0))>; - -def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R8C:$rB), - (ROTMAv4i32 VECREG:$rA, - (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; +def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), + (ROTMAv4i32 VECREG:$rA, (SFIvec (v4i32 VECREG:$rB), 0))>; def ROTMAr32: RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), @@ -4208,8 +4178,8 @@ def : Pat<(fabs (v4f32 VECREG:$rA)), //===----------------------------------------------------------------------===// // Hint for branch instructions: //===----------------------------------------------------------------------===// - -/* def HBR : SPUInstr<(outs), (ins), "hbr\t" */ +def HBRA : + HBI16Form<0b0001001,(ins hbrtarget:$brinst, brtarget:$btarg), "hbra\t$brinst, $btarg">; //===----------------------------------------------------------------------===// // Execution, Load NOP (execute NOPs belong in even pipeline, load NOPs belong diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h index 641da04..1708c59 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.h +++ b/lib/Target/CellSPU/SPURegisterInfo.h @@ -46,6 +46,14 @@ namespace llvm { virtual const TargetRegisterClass * getPointerRegClass(unsigned Kind = 0) const; + /// After allocating this many registers, the allocator should feel + /// register pressure. The value is a somewhat random guess, based on the + /// number of non callee saved registers in the C calling convention. + virtual unsigned getRegPressureLimit( const TargetRegisterClass *RC, + MachineFunction &MF) const{ + return 50; + } + //! Return the array of callee-saved registers virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF) const; diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 71d6049..797cfd5 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -1348,12 +1348,10 @@ void CppWriter::printInstruction(const Instruction *I, const PHINode* phi = cast<PHINode>(I); Out << "PHINode* " << iName << " = PHINode::Create(" - << getCppName(phi->getType()) << ", \""; + << getCppName(phi->getType()) << ", " + << phi->getNumIncomingValues() << ", \""; printEscapedString(phi->getName()); Out << "\", " << bbname << ");"; - nl(Out) << iName << "->reserveOperandSpace(" - << phi->getNumIncomingValues() - << ");"; nl(Out); for (unsigned i = 0; i < phi->getNumOperands(); i+=2) { Out << iName << "->addIncoming(" diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp index 3379ac2..060a87b 100644 --- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp +++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp @@ -57,18 +57,26 @@ static unsigned mblazeBinary2Opcode[] = { }; static unsigned getRD(uint32_t insn) { + if (!MBlazeRegisterInfo::isRegister((insn>>21)&0x1F)) + return UNSUPPORTED; return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>21)&0x1F); } static unsigned getRA(uint32_t insn) { + if (!MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F)) + return UNSUPPORTED; return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F); } static unsigned getRB(uint32_t insn) { + if (!MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F)) + return UNSUPPORTED; return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F); } static int64_t getRS(uint32_t insn) { + if (!MBlazeRegisterInfo::isSpecialRegister(insn&0x3FFF)) + return UNSUPPORTED; return MBlazeRegisterInfo::getSpecialRegisterFromNumbering(insn&0x3FFF); } @@ -489,13 +497,14 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, raw_ostream &vStream) const { // The machine instruction. uint32_t insn; + uint64_t read; uint8_t bytes[4]; - // We always consume 4 bytes of data - size = 4; + // By default we consume 1 byte on failure + size = 1; // We want to read exactly 4 bytes of data. - if (region.readBytes(address, 4, (uint8_t*)bytes, NULL) == -1) + if (region.readBytes(address, 4, (uint8_t*)bytes, &read) == -1 || read < 4) return false; // Encoded as a big-endian 32-bit word in the stream. @@ -509,44 +518,63 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, instr.setOpcode(opcode); + unsigned RD = getRD(insn); + unsigned RA = getRA(insn); + unsigned RB = getRB(insn); + unsigned RS = getRS(insn); + uint64_t tsFlags = MBlazeInsts[opcode].TSFlags; switch ((tsFlags & MBlazeII::FormMask)) { - default: llvm_unreachable("unknown instruction encoding"); + default: + return false; case MBlazeII::FRRRR: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); - instr.addOperand(MCOperand::CreateReg(getRB(insn))); - instr.addOperand(MCOperand::CreateReg(getRA(insn))); + if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); + instr.addOperand(MCOperand::CreateReg(RB)); + instr.addOperand(MCOperand::CreateReg(RA)); break; case MBlazeII::FRRR: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); - instr.addOperand(MCOperand::CreateReg(getRA(insn))); - instr.addOperand(MCOperand::CreateReg(getRB(insn))); + if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); + instr.addOperand(MCOperand::CreateReg(RA)); + instr.addOperand(MCOperand::CreateReg(RB)); break; case MBlazeII::FRI: switch (opcode) { - default: llvm_unreachable("unknown instruction encoding"); + default: + return false; case MBlaze::MFS: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); + if (RD == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateImm(insn&0x3FFF)); break; case MBlaze::MTS: + if (RA == UNSUPPORTED) + return false; instr.addOperand(MCOperand::CreateImm(insn&0x3FFF)); - instr.addOperand(MCOperand::CreateReg(getRA(insn))); + instr.addOperand(MCOperand::CreateReg(RA)); break; case MBlaze::MSRSET: case MBlaze::MSRCLR: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); + if (RD == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateImm(insn&0x7FFF)); break; } break; case MBlazeII::FRRI: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); - instr.addOperand(MCOperand::CreateReg(getRA(insn))); + if (RD == UNSUPPORTED || RA == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); + instr.addOperand(MCOperand::CreateReg(RA)); switch (opcode) { default: instr.addOperand(MCOperand::CreateImm(getIMM(insn))); @@ -560,27 +588,37 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, break; case MBlazeII::FCRR: - instr.addOperand(MCOperand::CreateReg(getRA(insn))); - instr.addOperand(MCOperand::CreateReg(getRB(insn))); + if (RA == UNSUPPORTED || RB == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RA)); + instr.addOperand(MCOperand::CreateReg(RB)); break; case MBlazeII::FCRI: - instr.addOperand(MCOperand::CreateReg(getRA(insn))); + if (RA == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RA)); instr.addOperand(MCOperand::CreateImm(getIMM(insn))); break; case MBlazeII::FRCR: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); - instr.addOperand(MCOperand::CreateReg(getRB(insn))); + if (RD == UNSUPPORTED || RB == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); + instr.addOperand(MCOperand::CreateReg(RB)); break; case MBlazeII::FRCI: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); + if (RD == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateImm(getIMM(insn))); break; case MBlazeII::FCCR: - instr.addOperand(MCOperand::CreateReg(getRB(insn))); + if (RB == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RB)); break; case MBlazeII::FCCI: @@ -588,33 +626,45 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, break; case MBlazeII::FRRCI: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); - instr.addOperand(MCOperand::CreateReg(getRA(insn))); + if (RD == UNSUPPORTED || RA == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); + instr.addOperand(MCOperand::CreateReg(RA)); instr.addOperand(MCOperand::CreateImm(getSHT(insn))); break; case MBlazeII::FRRC: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); - instr.addOperand(MCOperand::CreateReg(getRA(insn))); + if (RD == UNSUPPORTED || RA == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); + instr.addOperand(MCOperand::CreateReg(RA)); break; case MBlazeII::FRCX: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); + if (RD == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateImm(getFSL(insn))); break; case MBlazeII::FRCS: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); - instr.addOperand(MCOperand::CreateReg(getRS(insn))); + if (RD == UNSUPPORTED || RS == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); + instr.addOperand(MCOperand::CreateReg(RS)); break; case MBlazeII::FCRCS: - instr.addOperand(MCOperand::CreateReg(getRS(insn))); - instr.addOperand(MCOperand::CreateReg(getRA(insn))); + if (RS == UNSUPPORTED || RA == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RS)); + instr.addOperand(MCOperand::CreateReg(RA)); break; case MBlazeII::FCRCX: - instr.addOperand(MCOperand::CreateReg(getRA(insn))); + if (RA == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RA)); instr.addOperand(MCOperand::CreateImm(getFSL(insn))); break; @@ -623,16 +673,23 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, break; case MBlazeII::FCR: - instr.addOperand(MCOperand::CreateReg(getRB(insn))); + if (RB == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RB)); break; case MBlazeII::FRIR: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); + if (RD == UNSUPPORTED || RA == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateImm(getIMM(insn))); - instr.addOperand(MCOperand::CreateReg(getRA(insn))); + instr.addOperand(MCOperand::CreateReg(RA)); break; } + // We always consume 4 bytes of data on success + size = 4; + return true; } diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h index bebc6c8..13c4b49 100644 --- a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h +++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h @@ -18,11 +18,12 @@ namespace llvm { class MCOperand; + class TargetMachine; class MBlazeInstPrinter : public MCInstPrinter { public: - MBlazeInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) { - } + MBlazeInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI) + : MCInstPrinter(MAI) {} virtual void printInst(const MCInst *MI, raw_ostream &O); diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td index 1fa1e4d..1245658 100644 --- a/lib/Target/MBlaze/MBlaze.td +++ b/lib/Target/MBlaze/MBlaze.td @@ -31,49 +31,28 @@ def MBlazeInstrInfo : InstrInfo; // Microblaze Subtarget features // //===----------------------------------------------------------------------===// -def FeaturePipe3 : SubtargetFeature<"pipe3", "HasPipe3", "true", - "Implements 3-stage pipeline">; def FeatureBarrel : SubtargetFeature<"barrel", "HasBarrel", "true", "Implements barrel shifter">; def FeatureDiv : SubtargetFeature<"div", "HasDiv", "true", "Implements hardware divider">; def FeatureMul : SubtargetFeature<"mul", "HasMul", "true", "Implements hardware multiplier">; -def FeatureFSL : SubtargetFeature<"fsl", "HasFSL", "true", - "Implements FSL instructions">; -def FeatureEFSL : SubtargetFeature<"efsl", "HasEFSL", "true", - "Implements extended FSL instructions">; -def FeatureMSRSet : SubtargetFeature<"msrset", "HasMSRSet", "true", - "Implements MSR register set and clear">; -def FeatureException : SubtargetFeature<"exception", "HasException", "true", - "Implements hardware exception support">; def FeaturePatCmp : SubtargetFeature<"patcmp", "HasPatCmp", "true", "Implements pattern compare instruction">; def FeatureFPU : SubtargetFeature<"fpu", "HasFPU", "true", "Implements floating point unit">; -def FeatureESR : SubtargetFeature<"esr", "HasESR", "true", - "Implements ESR and EAR registers">; -def FeaturePVR : SubtargetFeature<"pvr", "HasPVR", "true", - "Implements processor version register">; def FeatureMul64 : SubtargetFeature<"mul64", "HasMul64", "true", "Implements multiplier with 64-bit result">; def FeatureSqrt : SubtargetFeature<"sqrt", "HasSqrt", "true", "Implements sqrt and floating point convert">; -def FeatureMMU : SubtargetFeature<"mmu", "HasMMU", "true", - "Implements memory management unit">; //===----------------------------------------------------------------------===// // MBlaze processors supported. //===----------------------------------------------------------------------===// -class Proc<string Name, list<SubtargetFeature> Features> - : Processor<Name, MBlazeGenericItineraries, Features>; - -def : Proc<"v400", []>; -def : Proc<"v500", []>; -def : Proc<"v600", []>; -def : Proc<"v700", []>; -def : Proc<"v710", []>; +def : Processor<"mblaze", MBlazeGenericItineraries, []>; +def : Processor<"mblaze3", MBlazePipe3Itineraries, []>; +def : Processor<"mblaze5", MBlazePipe5Itineraries, []>; //===----------------------------------------------------------------------===// // Instruction Descriptions diff --git a/lib/Target/MBlaze/MBlazeAsmBackend.cpp b/lib/Target/MBlaze/MBlazeAsmBackend.cpp index a4b21af..08f14c3 100644 --- a/lib/Target/MBlaze/MBlazeAsmBackend.cpp +++ b/lib/Target/MBlaze/MBlazeAsmBackend.cpp @@ -150,14 +150,13 @@ void ELFMBlazeAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, TargetAsmBackend *llvm::createMBlazeAsmBackend(const Target &T, const std::string &TT) { - switch (Triple(TT).getOS()) { - case Triple::Darwin: + Triple TheTriple(TT); + + if (TheTriple.isOSDarwin()) assert(0 && "Mac not supported on MBlaze"); - case Triple::MinGW32: - case Triple::Cygwin: - case Triple::Win32: + + if (TheTriple.isOSWindows()) assert(0 && "Windows not supported on MBlaze"); - default: - return new ELFMBlazeAsmBackend(T, Triple(TT).getOS()); - } + + return new ELFMBlazeAsmBackend(T, TheTriple.getOS()); } diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp index 0016df5..0f0f60e 100644 --- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp +++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp @@ -319,10 +319,11 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { } static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T, + TargetMachine &TM, unsigned SyntaxVariant, const MCAsmInfo &MAI) { if (SyntaxVariant == 0) - return new MBlazeInstPrinter(MAI); + return new MBlazeInstPrinter(TM, MAI); return 0; } diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp index 4399ee2..973e968 100644 --- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp +++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp @@ -77,7 +77,7 @@ static bool hasImmInstruction(MachineBasicBlock::iterator &candidate) { // We must assume that unknown immediate values require more than // 16-bits to represent. - if (mop.isGlobal() || mop.isSymbol()) + if (mop.isGlobal() || mop.isSymbol() || mop.isJTI() || mop.isCPI()) return true; // FIXME: we could probably check to see if the FP value happens diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index f39826b..21a5988 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -274,7 +274,7 @@ MBlazeTargetLowering::EmitCustomShift(MachineInstr *MI, F->insert(It, loop); F->insert(It, finish); - // Update machine-CFG edges by transfering adding all successors and + // Update machine-CFG edges by transferring adding all successors and // remaining instructions from the current block to the new block which // will contain the Phi node for the select. finish->splice(finish->begin(), MBB, @@ -456,7 +456,7 @@ MBlazeTargetLowering::EmitCustomAtomic(MachineInstr *MI, F->insert(It, start); F->insert(It, exit); - // Update machine-CFG edges by transfering adding all successors and + // Update machine-CFG edges by transferring adding all successors and // remaining instructions from the current block to the new block which // will contain the Phi node for the select. exit->splice(exit->begin(), MBB, llvm::next(MachineBasicBlock::iterator(MI)), @@ -778,7 +778,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. - // The InFlag in necessary since all emited instructions must be + // The InFlag in necessary since all emitted instructions must be // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { @@ -1103,7 +1103,7 @@ MBlazeTargetLowering::getSingleConstraintMatchWeight( switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); - break;
+ break; case 'd': case 'y': if (type->isIntegerTy()) diff --git a/lib/Target/MBlaze/MBlazeInstrFPU.td b/lib/Target/MBlaze/MBlazeInstrFPU.td index 094de5c..4acdcfd 100644 --- a/lib/Target/MBlaze/MBlazeInstrFPU.td +++ b/lib/Target/MBlaze/MBlazeInstrFPU.td @@ -21,22 +21,22 @@ class LoadFM<bits<6> op, string instr_asm, PatFrag OpNode> : TA<op, 0x000, (outs GPR:$dst), (ins memrr:$addr), !strconcat(instr_asm, " $dst, $addr"), - [(set (f32 GPR:$dst), (OpNode xaddr:$addr))], IILoad>; + [(set (f32 GPR:$dst), (OpNode xaddr:$addr))], IIC_MEMl>; class LoadFMI<bits<6> op, string instr_asm, PatFrag OpNode> : TB<op, (outs GPR:$dst), (ins memri:$addr), !strconcat(instr_asm, " $dst, $addr"), - [(set (f32 GPR:$dst), (OpNode iaddr:$addr))], IILoad>; + [(set (f32 GPR:$dst), (OpNode iaddr:$addr))], IIC_MEMl>; class StoreFM<bits<6> op, string instr_asm, PatFrag OpNode> : TA<op, 0x000, (outs), (ins GPR:$dst, memrr:$addr), !strconcat(instr_asm, " $dst, $addr"), - [(OpNode (f32 GPR:$dst), xaddr:$addr)], IIStore>; + [(OpNode (f32 GPR:$dst), xaddr:$addr)], IIC_MEMs>; class StoreFMI<bits<6> op, string instr_asm, PatFrag OpNode> : TB<op, (outs), (ins GPR:$dst, memrr:$addr), !strconcat(instr_asm, " $dst, $addr"), - [(OpNode (f32 GPR:$dst), iaddr:$addr)], IIStore>; + [(OpNode (f32 GPR:$dst), iaddr:$addr)], IIC_MEMs>; class ArithF<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode, InstrItinClass itin> : @@ -56,15 +56,10 @@ class ArithFR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode, !strconcat(instr_asm, " $dst, $c, $b"), [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>; -class LogicF<bits<6> op, string instr_asm> : - TB<op, (outs GPR:$dst), (ins GPR:$b, GPR:$c), - !strconcat(instr_asm, " $dst, $b, $c"), - [], IIAlu>; - class LogicFI<bits<6> op, string instr_asm> : TB<op, (outs GPR:$dst), (ins GPR:$b, fimm:$c), !strconcat(instr_asm, " $dst, $b, $c"), - [], IIAlu>; + [], IIC_ALU>; let rb=0 in { class ArithF2<bits<6> op, bits<11> flags, string instr_asm, @@ -95,10 +90,10 @@ let rb=0 in { //===----------------------------------------------------------------------===// let Predicates=[HasFPU] in { def FORI : LogicFI<0x28, "ori ">; - def FADD : ArithF<0x16, 0x000, "fadd ", fadd, IIAlu>; - def FRSUB : ArithFR<0x16, 0x080, "frsub ", fsub, IIAlu>; - def FMUL : ArithF<0x16, 0x100, "fmul ", fmul, IIAlu>; - def FDIV : ArithF<0x16, 0x180, "fdiv ", fdiv, IIAlu>; + def FADD : ArithF<0x16, 0x000, "fadd ", fadd, IIC_FPU>; + def FRSUB : ArithFR<0x16, 0x080, "frsub ", fsub, IIC_FPU>; + def FMUL : ArithF<0x16, 0x100, "fmul ", fmul, IIC_FPU>; + def FDIV : ArithF<0x16, 0x180, "fdiv ", fdiv, IIC_FPUd>; } let Predicates=[HasFPU], isCodeGenOnly=1 in { @@ -110,19 +105,19 @@ let Predicates=[HasFPU], isCodeGenOnly=1 in { } let Predicates=[HasFPU,HasSqrt] in { - def FLT : ArithIF<0x16, 0x280, "flt ", IIAlu>; - def FINT : ArithFI<0x16, 0x300, "fint ", IIAlu>; - def FSQRT : ArithF2<0x16, 0x380, "fsqrt ", IIAlu>; + def FLT : ArithIF<0x16, 0x280, "flt ", IIC_FPUf>; + def FINT : ArithFI<0x16, 0x300, "fint ", IIC_FPUi>; + def FSQRT : ArithF2<0x16, 0x380, "fsqrt ", IIC_FPUs>; } let isAsCheapAsAMove = 1 in { - def FCMP_UN : CmpFN<0x16, 0x200, "fcmp.un", IIAlu>; - def FCMP_LT : CmpFN<0x16, 0x210, "fcmp.lt", IIAlu>; - def FCMP_EQ : CmpFN<0x16, 0x220, "fcmp.eq", IIAlu>; - def FCMP_LE : CmpFN<0x16, 0x230, "fcmp.le", IIAlu>; - def FCMP_GT : CmpFN<0x16, 0x240, "fcmp.gt", IIAlu>; - def FCMP_NE : CmpFN<0x16, 0x250, "fcmp.ne", IIAlu>; - def FCMP_GE : CmpFN<0x16, 0x260, "fcmp.ge", IIAlu>; + def FCMP_UN : CmpFN<0x16, 0x200, "fcmp.un", IIC_FPUc>; + def FCMP_LT : CmpFN<0x16, 0x210, "fcmp.lt", IIC_FPUc>; + def FCMP_EQ : CmpFN<0x16, 0x220, "fcmp.eq", IIC_FPUc>; + def FCMP_LE : CmpFN<0x16, 0x230, "fcmp.le", IIC_FPUc>; + def FCMP_GT : CmpFN<0x16, 0x240, "fcmp.gt", IIC_FPUc>; + def FCMP_NE : CmpFN<0x16, 0x250, "fcmp.ne", IIC_FPUc>; + def FCMP_GE : CmpFN<0x16, 0x260, "fcmp.ge", IIC_FPUc>; } diff --git a/lib/Target/MBlaze/MBlazeInstrFSL.td b/lib/Target/MBlaze/MBlazeInstrFSL.td index 3209845..3082a7e 100644 --- a/lib/Target/MBlaze/MBlazeInstrFSL.td +++ b/lib/Target/MBlaze/MBlazeInstrFSL.td @@ -13,7 +13,7 @@ class FSLGet<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> : MBlazeInst<op, FRCX, (outs GPR:$dst), (ins fslimm:$b), !strconcat(instr_asm, " $dst, $b"), - [(set GPR:$dst, (OpNode immZExt4:$b))],IIAlu> + [(set GPR:$dst, (OpNode immZExt4:$b))],IIC_FSLg> { bits<5> rd; bits<4> fslno; @@ -29,7 +29,7 @@ class FSLGet<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> : class FSLGetD<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> : MBlazeInst<op, FRCR, (outs GPR:$dst), (ins GPR:$b), !strconcat(instr_asm, " $dst, $b"), - [(set GPR:$dst, (OpNode GPR:$b))], IIAlu> + [(set GPR:$dst, (OpNode GPR:$b))], IIC_FSLg> { bits<5> rd; bits<5> rb; @@ -45,7 +45,7 @@ class FSLGetD<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> : class FSLPut<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> : MBlazeInst<op, FCRCX, (outs), (ins GPR:$v, fslimm:$b), !strconcat(instr_asm, " $v, $b"), - [(OpNode GPR:$v, immZExt4:$b)], IIAlu> + [(OpNode GPR:$v, immZExt4:$b)], IIC_FSLp> { bits<5> ra; bits<4> fslno; @@ -61,7 +61,7 @@ class FSLPut<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> : class FSLPutD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> : MBlazeInst<op, FCRR, (outs), (ins GPR:$v, GPR:$b), !strconcat(instr_asm, " $v, $b"), - [(OpNode GPR:$v, GPR:$b)], IIAlu> + [(OpNode GPR:$v, GPR:$b)], IIC_FSLp> { bits<5> ra; bits<5> rb; @@ -77,7 +77,7 @@ class FSLPutD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> : class FSLPutT<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> : MBlazeInst<op, FCX, (outs), (ins fslimm:$b), !strconcat(instr_asm, " $b"), - [(OpNode immZExt4:$b)], IIAlu> + [(OpNode immZExt4:$b)], IIC_FSLp> { bits<4> fslno; @@ -92,7 +92,7 @@ class FSLPutT<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> : class FSLPutTD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> : MBlazeInst<op, FCR, (outs), (ins GPR:$b), !strconcat(instr_asm, " $b"), - [(OpNode GPR:$b)], IIAlu> + [(OpNode GPR:$b)], IIC_FSLp> { bits<5> rb; diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td index d62574d..54f605f 100644 --- a/lib/Target/MBlaze/MBlazeInstrFormats.td +++ b/lib/Target/MBlaze/MBlazeInstrFormats.td @@ -81,7 +81,7 @@ class MBlazeInst<bits<6> op, Format form, dag outs, dag ins, string asmstr, // Pseudo instruction class //===----------------------------------------------------------------------===// class MBlazePseudo<dag outs, dag ins, string asmstr, list<dag> pattern>: - MBlazeInst<0x0, FPseudo, outs, ins, asmstr, pattern, IIPseudo>; + MBlazeInst<0x0, FPseudo, outs, ins, asmstr, pattern, IIC_Pseudo>; //===----------------------------------------------------------------------===// // Type A instruction class in MBlaze : <|opcode|rd|ra|rb|flags|> diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp index b353dcd..794ebed 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp +++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp @@ -17,6 +17,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScoreboardHazardRecognizer.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "MBlazeGenInstrInfo.inc" diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h index b7300c1..b717da8 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.h +++ b/lib/Target/MBlaze/MBlazeInstrInfo.h @@ -261,7 +261,6 @@ public: virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; - virtual void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td index 7b8f70a..896e8ea 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.td +++ b/lib/Target/MBlaze/MBlazeInstrInfo.td @@ -47,22 +47,22 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MBCallSeqEnd, //===----------------------------------------------------------------------===// // MBlaze Instruction Predicate Definitions. //===----------------------------------------------------------------------===// -def HasPipe3 : Predicate<"Subtarget.hasPipe3()">; +// def HasPipe3 : Predicate<"Subtarget.hasPipe3()">; def HasBarrel : Predicate<"Subtarget.hasBarrel()">; -def NoBarrel : Predicate<"!Subtarget.hasBarrel()">; +// def NoBarrel : Predicate<"!Subtarget.hasBarrel()">; def HasDiv : Predicate<"Subtarget.hasDiv()">; def HasMul : Predicate<"Subtarget.hasMul()">; -def HasFSL : Predicate<"Subtarget.hasFSL()">; -def HasEFSL : Predicate<"Subtarget.hasEFSL()">; -def HasMSRSet : Predicate<"Subtarget.hasMSRSet()">; -def HasException : Predicate<"Subtarget.hasException()">; +// def HasFSL : Predicate<"Subtarget.hasFSL()">; +// def HasEFSL : Predicate<"Subtarget.hasEFSL()">; +// def HasMSRSet : Predicate<"Subtarget.hasMSRSet()">; +// def HasException : Predicate<"Subtarget.hasException()">; def HasPatCmp : Predicate<"Subtarget.hasPatCmp()">; def HasFPU : Predicate<"Subtarget.hasFPU()">; -def HasESR : Predicate<"Subtarget.hasESR()">; -def HasPVR : Predicate<"Subtarget.hasPVR()">; +// def HasESR : Predicate<"Subtarget.hasESR()">; +// def HasPVR : Predicate<"Subtarget.hasPVR()">; def HasMul64 : Predicate<"Subtarget.hasMul64()">; def HasSqrt : Predicate<"Subtarget.hasSqrt()">; -def HasMMU : Predicate<"Subtarget.hasMMU()">; +// def HasMMU : Predicate<"Subtarget.hasMMU()">; //===----------------------------------------------------------------------===// // MBlaze Operand, Complex Patterns and Transformations Definitions. @@ -170,18 +170,18 @@ class ArithI<bits<6> op, string instr_asm, SDNode OpNode, Operand Od, PatLeaf imm_type> : TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c), !strconcat(instr_asm, " $dst, $b, $c"), - [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIAlu>; + [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIC_ALU>; class ArithI32<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> : TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c), !strconcat(instr_asm, " $dst, $b, $c"), - [], IIAlu>; + [], IIC_ALU>; class ShiftI<bits<6> op, bits<2> flags, string instr_asm, SDNode OpNode, Operand Od, PatLeaf imm_type> : SHT<op, flags, (outs GPR:$dst), (ins GPR:$b, Od:$c), !strconcat(instr_asm, " $dst, $b, $c"), - [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIAlu>; + [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIC_SHT>; class ArithR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode, InstrItinClass itin> : @@ -193,7 +193,7 @@ class ArithRI<bits<6> op, string instr_asm, SDNode OpNode, Operand Od, PatLeaf imm_type> : TBR<op, (outs GPR:$dst), (ins Od:$b, GPR:$c), !strconcat(instr_asm, " $dst, $c, $b"), - [(set GPR:$dst, (OpNode imm_type:$b, GPR:$c))], IIAlu>; + [(set GPR:$dst, (OpNode imm_type:$b, GPR:$c))], IIC_ALU>; class ArithN<bits<6> op, bits<11> flags, string instr_asm, InstrItinClass itin> : @@ -204,7 +204,7 @@ class ArithN<bits<6> op, bits<11> flags, string instr_asm, class ArithNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> : TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c), !strconcat(instr_asm, " $dst, $b, $c"), - [], IIAlu>; + [], IIC_ALU>; class ArithRN<bits<6> op, bits<11> flags, string instr_asm, InstrItinClass itin> : @@ -215,7 +215,7 @@ class ArithRN<bits<6> op, bits<11> flags, string instr_asm, class ArithRNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> : TBR<op, (outs GPR:$dst), (ins Od:$c, GPR:$b), !strconcat(instr_asm, " $dst, $b, $c"), - [], IIAlu>; + [], IIC_ALU>; //===----------------------------------------------------------------------===// // Misc Arithmetic Instructions @@ -224,23 +224,23 @@ class ArithRNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> : class Logic<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode> : TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c), !strconcat(instr_asm, " $dst, $b, $c"), - [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], IIAlu>; + [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], IIC_ALU>; class LogicI<bits<6> op, string instr_asm, SDNode OpNode> : TB<op, (outs GPR:$dst), (ins GPR:$b, uimm16:$c), !strconcat(instr_asm, " $dst, $b, $c"), [(set GPR:$dst, (OpNode GPR:$b, immZExt16:$c))], - IIAlu>; + IIC_ALU>; class LogicI32<bits<6> op, string instr_asm> : TB<op, (outs GPR:$dst), (ins GPR:$b, uimm16:$c), !strconcat(instr_asm, " $dst, $b, $c"), - [], IIAlu>; + [], IIC_ALU>; class PatCmp<bits<6> op, bits<11> flags, string instr_asm> : TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c), !strconcat(instr_asm, " $dst, $b, $c"), - [], IIAlu>; + [], IIC_ALU>; //===----------------------------------------------------------------------===// // Memory Access Instructions @@ -248,22 +248,22 @@ class PatCmp<bits<6> op, bits<11> flags, string instr_asm> : class LoadM<bits<6> op, bits<11> flags, string instr_asm> : TA<op, flags, (outs GPR:$dst), (ins memrr:$addr), !strconcat(instr_asm, " $dst, $addr"), - [], IILoad>; + [], IIC_MEMl>; class LoadMI<bits<6> op, string instr_asm, PatFrag OpNode> : TB<op, (outs GPR:$dst), (ins memri:$addr), !strconcat(instr_asm, " $dst, $addr"), - [(set (i32 GPR:$dst), (OpNode iaddr:$addr))], IILoad>; + [(set (i32 GPR:$dst), (OpNode iaddr:$addr))], IIC_MEMl>; class StoreM<bits<6> op, bits<11> flags, string instr_asm> : TA<op, flags, (outs), (ins GPR:$dst, memrr:$addr), !strconcat(instr_asm, " $dst, $addr"), - [], IIStore>; + [], IIC_MEMs>; class StoreMI<bits<6> op, string instr_asm, PatFrag OpNode> : TB<op, (outs), (ins GPR:$dst, memri:$addr), !strconcat(instr_asm, " $dst, $addr"), - [(OpNode (i32 GPR:$dst), iaddr:$addr)], IIStore>; + [(OpNode (i32 GPR:$dst), iaddr:$addr)], IIC_MEMs>; //===----------------------------------------------------------------------===// // Branch Instructions @@ -271,7 +271,7 @@ class StoreMI<bits<6> op, string instr_asm, PatFrag OpNode> : class Branch<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> : TA<op, flags, (outs), (ins GPR:$target), !strconcat(instr_asm, " $target"), - [], IIBranch> { + [], IIC_BR> { let rd = 0x0; let ra = br; let Form = FCCR; @@ -280,7 +280,7 @@ class Branch<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> : class BranchI<bits<6> op, bits<5> br, string instr_asm> : TB<op, (outs), (ins brtarget:$target), !strconcat(instr_asm, " $target"), - [], IIBranch> { + [], IIC_BR> { let rd = 0; let ra = br; let Form = FCCI; @@ -292,7 +292,7 @@ class BranchI<bits<6> op, bits<5> br, string instr_asm> : class BranchL<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> : TA<op, flags, (outs), (ins GPR:$link, GPR:$target, variable_ops), !strconcat(instr_asm, " $link, $target"), - [], IIBranch> { + [], IIC_BRl> { let ra = br; let Form = FRCR; } @@ -300,7 +300,7 @@ class BranchL<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> : class BranchLI<bits<6> op, bits<5> br, string instr_asm> : TB<op, (outs), (ins GPR:$link, calltarget:$target, variable_ops), !strconcat(instr_asm, " $link, $target"), - [], IIBranch> { + [], IIC_BRl> { let ra = br; let Form = FRCI; } @@ -312,7 +312,7 @@ class BranchC<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> : TA<op, flags, (outs), (ins GPR:$a, GPR:$b), !strconcat(instr_asm, " $a, $b"), - [], IIBranch> { + [], IIC_BRc> { let rd = br; let Form = FCRR; } @@ -320,7 +320,7 @@ class BranchC<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> : class BranchCI<bits<6> op, bits<5> br, string instr_asm> : TB<op, (outs), (ins GPR:$a, brtarget:$offset), !strconcat(instr_asm, " $a, $offset"), - [], IIBranch> { + [], IIC_BRc> { let rd = br; let Form = FCRI; } @@ -330,71 +330,74 @@ class BranchCI<bits<6> op, bits<5> br, string instr_asm> : //===----------------------------------------------------------------------===// let isCommutable = 1, isAsCheapAsAMove = 1 in { - def ADDK : Arith<0x04, 0x000, "addk ", add, IIAlu>; + def ADDK : Arith<0x04, 0x000, "addk ", add, IIC_ALU>; def AND : Logic<0x21, 0x000, "and ", and>; def OR : Logic<0x20, 0x000, "or ", or>; def XOR : Logic<0x22, 0x000, "xor ", xor>; - def PCMPBF : PatCmp<0x20, 0x400, "pcmpbf ">; - def PCMPEQ : PatCmp<0x22, 0x400, "pcmpeq ">; - def PCMPNE : PatCmp<0x23, 0x400, "pcmpne ">; + + let Predicates=[HasPatCmp] in { + def PCMPBF : PatCmp<0x20, 0x400, "pcmpbf ">; + def PCMPEQ : PatCmp<0x22, 0x400, "pcmpeq ">; + def PCMPNE : PatCmp<0x23, 0x400, "pcmpne ">; + } let Defs = [CARRY] in { - def ADD : Arith<0x00, 0x000, "add ", addc, IIAlu>; + def ADD : Arith<0x00, 0x000, "add ", addc, IIC_ALU>; let Uses = [CARRY] in { - def ADDC : Arith<0x02, 0x000, "addc ", adde, IIAlu>; + def ADDC : Arith<0x02, 0x000, "addc ", adde, IIC_ALU>; } } let Uses = [CARRY] in { - def ADDKC : ArithN<0x06, 0x000, "addkc ", IIAlu>; + def ADDKC : ArithN<0x06, 0x000, "addkc ", IIC_ALU>; } } let isAsCheapAsAMove = 1 in { - def ANDN : ArithN<0x23, 0x000, "andn ", IIAlu>; - def CMP : ArithN<0x05, 0x001, "cmp ", IIAlu>; - def CMPU : ArithN<0x05, 0x003, "cmpu ", IIAlu>; - def RSUBK : ArithR<0x05, 0x000, "rsubk ", sub, IIAlu>; + def ANDN : ArithN<0x23, 0x000, "andn ", IIC_ALU>; + def CMP : ArithN<0x05, 0x001, "cmp ", IIC_ALU>; + def CMPU : ArithN<0x05, 0x003, "cmpu ", IIC_ALU>; + def RSUBK : ArithR<0x05, 0x000, "rsubk ", sub, IIC_ALU>; let Defs = [CARRY] in { - def RSUB : ArithR<0x01, 0x000, "rsub ", subc, IIAlu>; + def RSUB : ArithR<0x01, 0x000, "rsub ", subc, IIC_ALU>; let Uses = [CARRY] in { - def RSUBC : ArithR<0x03, 0x000, "rsubc ", sube, IIAlu>; + def RSUBC : ArithR<0x03, 0x000, "rsubc ", sube, IIC_ALU>; } } let Uses = [CARRY] in { - def RSUBKC : ArithRN<0x07, 0x000, "rsubkc ", IIAlu>; + def RSUBKC : ArithRN<0x07, 0x000, "rsubkc ", IIC_ALU>; } } let isCommutable = 1, Predicates=[HasMul] in { - def MUL : Arith<0x10, 0x000, "mul ", mul, IIAlu>; + def MUL : Arith<0x10, 0x000, "mul ", mul, IIC_ALUm>; } let isCommutable = 1, Predicates=[HasMul,HasMul64] in { - def MULH : Arith<0x10, 0x001, "mulh ", mulhs, IIAlu>; - def MULHU : Arith<0x10, 0x003, "mulhu ", mulhu, IIAlu>; + def MULH : Arith<0x10, 0x001, "mulh ", mulhs, IIC_ALUm>; + def MULHU : Arith<0x10, 0x003, "mulhu ", mulhu, IIC_ALUm>; } let Predicates=[HasMul,HasMul64] in { - def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIAlu>; + def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIC_ALUm>; } let Predicates=[HasBarrel] in { - def BSRL : Arith<0x11, 0x000, "bsrl ", srl, IIAlu>; - def BSRA : Arith<0x11, 0x200, "bsra ", sra, IIAlu>; - def BSLL : Arith<0x11, 0x400, "bsll ", shl, IIAlu>; + def BSRL : Arith<0x11, 0x000, "bsrl ", srl, IIC_SHT>; + def BSRA : Arith<0x11, 0x200, "bsra ", sra, IIC_SHT>; + def BSLL : Arith<0x11, 0x400, "bsll ", shl, IIC_SHT>; def BSRLI : ShiftI<0x19, 0x0, "bsrli ", srl, uimm5, immZExt5>; def BSRAI : ShiftI<0x19, 0x1, "bsrai ", sra, uimm5, immZExt5>; def BSLLI : ShiftI<0x19, 0x2, "bslli ", shl, uimm5, immZExt5>; } let Predicates=[HasDiv] in { - def IDIV : ArithR<0x12, 0x000, "idiv ", sdiv, IIAlu>; - def IDIVU : ArithR<0x12, 0x002, "idivu ", udiv, IIAlu>; + def IDIV : ArithR<0x12, 0x000, "idiv ", sdiv, IIC_ALUd>; + def IDIVU : ArithR<0x12, 0x002, "idivu ", udiv, IIC_ALUd>; } //===----------------------------------------------------------------------===// @@ -552,7 +555,7 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, def RTSD : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm), "rtsd $target, $imm", [], - IIBranch>; + IIC_BR>; } let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, @@ -560,7 +563,7 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, def RTID : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm), "rtid $target, $imm", [], - IIBranch>; + IIC_BR>; } let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, @@ -568,7 +571,7 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, def RTBD : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm), "rtbd $target, $imm", [], - IIBranch>; + IIC_BR>; } let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, @@ -576,7 +579,7 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, def RTED : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm), "rted $target, $imm", [], - IIBranch>; + IIC_BR>; } //===----------------------------------------------------------------------===// @@ -584,7 +587,7 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, //===----------------------------------------------------------------------===// let neverHasSideEffects = 1 in { - def NOP : MBlazeInst< 0x20, FC, (outs), (ins), "nop ", [], IIAlu>; + def NOP : MBlazeInst< 0x20, FC, (outs), (ins), "nop ", [], IIC_ALU>; } let usesCustomInserter = 1 in { @@ -611,17 +614,17 @@ let usesCustomInserter = 1 in { let rb = 0 in { def SEXT16 : TA<0x24, 0x061, (outs GPR:$dst), (ins GPR:$src), - "sext16 $dst, $src", [], IIAlu>; + "sext16 $dst, $src", [], IIC_ALU>; def SEXT8 : TA<0x24, 0x060, (outs GPR:$dst), (ins GPR:$src), - "sext8 $dst, $src", [], IIAlu>; + "sext8 $dst, $src", [], IIC_ALU>; let Defs = [CARRY] in { def SRL : TA<0x24, 0x041, (outs GPR:$dst), (ins GPR:$src), - "srl $dst, $src", [], IIAlu>; + "srl $dst, $src", [], IIC_ALU>; def SRA : TA<0x24, 0x001, (outs GPR:$dst), (ins GPR:$src), - "sra $dst, $src", [], IIAlu>; + "sra $dst, $src", [], IIC_ALU>; let Uses = [CARRY] in { def SRC : TA<0x24, 0x021, (outs GPR:$dst), (ins GPR:$src), - "src $dst, $src", [], IIAlu>; + "src $dst, $src", [], IIC_ALU>; } } } @@ -637,36 +640,36 @@ let isCodeGenOnly=1 in { //===----------------------------------------------------------------------===// let Form=FRCS in { def MFS : SPC<0x25, 0x2, (outs GPR:$dst), (ins SPR:$src), - "mfs $dst, $src", [], IIAlu>; + "mfs $dst, $src", [], IIC_ALU>; } let Form=FCRCS in { def MTS : SPC<0x25, 0x3, (outs SPR:$dst), (ins GPR:$src), - "mts $dst, $src", [], IIAlu>; + "mts $dst, $src", [], IIC_ALU>; } def MSRSET : MSR<0x25, 0x20, (outs GPR:$dst), (ins uimm15:$set), - "msrset $dst, $set", [], IIAlu>; + "msrset $dst, $set", [], IIC_ALU>; def MSRCLR : MSR<0x25, 0x22, (outs GPR:$dst), (ins uimm15:$clr), - "msrclr $dst, $clr", [], IIAlu>; + "msrclr $dst, $clr", [], IIC_ALU>; let rd=0x0, Form=FCRR in { def WDC : TA<0x24, 0x64, (outs), (ins GPR:$a, GPR:$b), - "wdc $a, $b", [], IIAlu>; + "wdc $a, $b", [], IIC_WDC>; def WDCF : TA<0x24, 0x74, (outs), (ins GPR:$a, GPR:$b), - "wdc.flush $a, $b", [], IIAlu>; + "wdc.flush $a, $b", [], IIC_WDC>; def WDCC : TA<0x24, 0x66, (outs), (ins GPR:$a, GPR:$b), - "wdc.clear $a, $b", [], IIAlu>; + "wdc.clear $a, $b", [], IIC_WDC>; def WIC : TA<0x24, 0x68, (outs), (ins GPR:$a, GPR:$b), - "wic $a, $b", [], IIAlu>; + "wic $a, $b", [], IIC_WDC>; } def BRK : BranchL<0x26, 0x0C, 0x000, "brk ">; def BRKI : BranchLI<0x2E, 0x0C, "brki ">; def IMM : MBlazeInst<0x2C, FCCI, (outs), (ins simm16:$imm), - "imm $imm", [], IIAlu>; + "imm $imm", [], IIC_ALU>; //===----------------------------------------------------------------------===// // Pseudo instructions for atomic operations @@ -848,11 +851,6 @@ def : Pat<(MBWrapper tconstpool:$in), (ORI (i32 R0), tconstpool:$in)>; // Misc instructions def : Pat<(and (i32 GPR:$lh), (not (i32 GPR:$rh))),(ANDN GPR:$lh, GPR:$rh)>; -// Arithmetic with immediates -def : Pat<(add (i32 GPR:$in), imm:$imm),(ADDIK GPR:$in, imm:$imm)>; -def : Pat<(or (i32 GPR:$in), imm:$imm),(ORI GPR:$in, imm:$imm)>; -def : Pat<(xor (i32 GPR:$in), imm:$imm),(XORI GPR:$in, imm:$imm)>; - // Convert any extend loads into zero extend loads def : Pat<(extloadi8 iaddr:$src), (i32 (LBUI iaddr:$src))>; def : Pat<(extloadi16 iaddr:$src), (i32 (LHUI iaddr:$src))>; diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index fa9140d..ed8511d 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -181,6 +181,26 @@ unsigned MBlazeRegisterInfo::getSpecialRegisterFromNumbering(unsigned Reg) { return 0; // Not reached } +bool MBlazeRegisterInfo::isRegister(unsigned Reg) { + return Reg <= 31; +} + +bool MBlazeRegisterInfo::isSpecialRegister(unsigned Reg) { + switch (Reg) { + case 0x0000 : case 0x0001 : case 0x0003 : case 0x0005 : + case 0x0007 : case 0x000B : case 0x000D : case 0x1000 : + case 0x1001 : case 0x1002 : case 0x1003 : case 0x1004 : + case 0x2000 : case 0x2001 : case 0x2002 : case 0x2003 : + case 0x2004 : case 0x2005 : case 0x2006 : case 0x2007 : + case 0x2008 : case 0x2009 : case 0x200A : case 0x200B : + return true; + + default: + return false; + } + return false; // Not reached +} + unsigned MBlazeRegisterInfo::getPICCallReg() { return MBlaze::R20; } diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h index 839536d..69ec5aa 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.h +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h @@ -45,6 +45,8 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { static unsigned getRegisterNumbering(unsigned RegEnum); static unsigned getRegisterFromNumbering(unsigned RegEnum); static unsigned getSpecialRegisterFromNumbering(unsigned RegEnum); + static bool isRegister(unsigned RegEnum); + static bool isSpecialRegister(unsigned RegEnum); /// Get PIC indirect call register static unsigned getPICCallReg(); diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td index fbefb22..1a695a7 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.td +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td @@ -85,18 +85,19 @@ let Namespace = "MBlaze" in { def RTLBX : MBlazeSPRReg<0x1002, "rtlbx">, DwarfRegNum<[41]>; def RTLBLO : MBlazeSPRReg<0x1003, "rtlblo">, DwarfRegNum<[42]>; def RTLBHI : MBlazeSPRReg<0x1004, "rtlbhi">, DwarfRegNum<[43]>; - def RPVR0 : MBlazeSPRReg<0x2000, "rpvr0">, DwarfRegNum<[44]>; - def RPVR1 : MBlazeSPRReg<0x2001, "rpvr1">, DwarfRegNum<[45]>; - def RPVR2 : MBlazeSPRReg<0x2002, "rpvr2">, DwarfRegNum<[46]>; - def RPVR3 : MBlazeSPRReg<0x2003, "rpvr3">, DwarfRegNum<[47]>; - def RPVR4 : MBlazeSPRReg<0x2004, "rpvr4">, DwarfRegNum<[48]>; - def RPVR5 : MBlazeSPRReg<0x2005, "rpvr5">, DwarfRegNum<[49]>; - def RPVR6 : MBlazeSPRReg<0x2006, "rpvr6">, DwarfRegNum<[50]>; - def RPVR7 : MBlazeSPRReg<0x2007, "rpvr7">, DwarfRegNum<[51]>; - def RPVR8 : MBlazeSPRReg<0x2008, "rpvr8">, DwarfRegNum<[52]>; - def RPVR9 : MBlazeSPRReg<0x2009, "rpvr9">, DwarfRegNum<[53]>; - def RPVR10 : MBlazeSPRReg<0x200A, "rpvr10">, DwarfRegNum<[54]>; - def RPVR11 : MBlazeSPRReg<0x200B, "rpvr11">, DwarfRegNum<[55]>; + def RTLBSX : MBlazeSPRReg<0x1004, "rtlbsx">, DwarfRegNum<[44]>; + def RPVR0 : MBlazeSPRReg<0x2000, "rpvr0">, DwarfRegNum<[45]>; + def RPVR1 : MBlazeSPRReg<0x2001, "rpvr1">, DwarfRegNum<[46]>; + def RPVR2 : MBlazeSPRReg<0x2002, "rpvr2">, DwarfRegNum<[47]>; + def RPVR3 : MBlazeSPRReg<0x2003, "rpvr3">, DwarfRegNum<[48]>; + def RPVR4 : MBlazeSPRReg<0x2004, "rpvr4">, DwarfRegNum<[49]>; + def RPVR5 : MBlazeSPRReg<0x2005, "rpvr5">, DwarfRegNum<[50]>; + def RPVR6 : MBlazeSPRReg<0x2006, "rpvr6">, DwarfRegNum<[51]>; + def RPVR7 : MBlazeSPRReg<0x2007, "rpvr7">, DwarfRegNum<[52]>; + def RPVR8 : MBlazeSPRReg<0x2008, "rpvr8">, DwarfRegNum<[53]>; + def RPVR9 : MBlazeSPRReg<0x2009, "rpvr9">, DwarfRegNum<[54]>; + def RPVR10 : MBlazeSPRReg<0x200A, "rpvr10">, DwarfRegNum<[55]>; + def RPVR11 : MBlazeSPRReg<0x200B, "rpvr11">, DwarfRegNum<[56]>; // The carry bit. In the Microblaze this is really bit 29 of the // MSR register but this is the only bit of that register that we diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td index ac4d98c..4662f25 100644 --- a/lib/Target/MBlaze/MBlazeSchedule.td +++ b/lib/Target/MBlaze/MBlazeSchedule.td @@ -8,57 +8,48 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Functional units across MBlaze chips sets. Based on GCC/MBlaze backend files. +// MBlaze functional units. //===----------------------------------------------------------------------===// -def ALU : FuncUnit; -def IMULDIV : FuncUnit; +def IF : FuncUnit; +def ID : FuncUnit; +def EX : FuncUnit; +def MA : FuncUnit; +def WB : FuncUnit; //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for MBlaze //===----------------------------------------------------------------------===// -def IIAlu : InstrItinClass; -def IILoad : InstrItinClass; -def IIStore : InstrItinClass; -def IIXfer : InstrItinClass; -def IIBranch : InstrItinClass; -def IIHiLo : InstrItinClass; -def IIImul : InstrItinClass; -def IIIdiv : InstrItinClass; -def IIFcvt : InstrItinClass; -def IIFmove : InstrItinClass; -def IIFcmp : InstrItinClass; -def IIFadd : InstrItinClass; -def IIFmulSingle : InstrItinClass; -def IIFmulDouble : InstrItinClass; -def IIFdivSingle : InstrItinClass; -def IIFdivDouble : InstrItinClass; -def IIFsqrtSingle : InstrItinClass; -def IIFsqrtDouble : InstrItinClass; -def IIFrecipFsqrtStep : InstrItinClass; -def IIPseudo : InstrItinClass; +def IIC_ALU : InstrItinClass; +def IIC_ALUm : InstrItinClass; +def IIC_ALUd : InstrItinClass; +def IIC_SHT : InstrItinClass; +def IIC_FSLg : InstrItinClass; +def IIC_FSLp : InstrItinClass; +def IIC_MEMs : InstrItinClass; +def IIC_MEMl : InstrItinClass; +def IIC_FPU : InstrItinClass; +def IIC_FPUd : InstrItinClass; +def IIC_FPUf : InstrItinClass; +def IIC_FPUi : InstrItinClass; +def IIC_FPUs : InstrItinClass; +def IIC_FPUc : InstrItinClass; +def IIC_BR : InstrItinClass; +def IIC_BRc : InstrItinClass; +def IIC_BRl : InstrItinClass; +def IIC_WDC : InstrItinClass; +def IIC_Pseudo : InstrItinClass; //===----------------------------------------------------------------------===// -// MBlaze Generic instruction itineraries. +// MBlaze generic instruction itineraries. //===----------------------------------------------------------------------===// -def MBlazeGenericItineraries : ProcessorItineraries< - [ALU, IMULDIV], [], [ - InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>, - InstrItinData<IILoad , [InstrStage<3, [ALU]>]>, - InstrItinData<IIStore , [InstrStage<1, [ALU]>]>, - InstrItinData<IIXfer , [InstrStage<2, [ALU]>]>, - InstrItinData<IIBranch , [InstrStage<1, [ALU]>]>, - InstrItinData<IIHiLo , [InstrStage<1, [IMULDIV]>]>, - InstrItinData<IIImul , [InstrStage<17, [IMULDIV]>]>, - InstrItinData<IIIdiv , [InstrStage<38, [IMULDIV]>]>, - InstrItinData<IIFcvt , [InstrStage<1, [ALU]>]>, - InstrItinData<IIFmove , [InstrStage<2, [ALU]>]>, - InstrItinData<IIFcmp , [InstrStage<3, [ALU]>]>, - InstrItinData<IIFadd , [InstrStage<4, [ALU]>]>, - InstrItinData<IIFmulSingle , [InstrStage<7, [ALU]>]>, - InstrItinData<IIFmulDouble , [InstrStage<8, [ALU]>]>, - InstrItinData<IIFdivSingle , [InstrStage<23, [ALU]>]>, - InstrItinData<IIFdivDouble , [InstrStage<36, [ALU]>]>, - InstrItinData<IIFsqrtSingle , [InstrStage<54, [ALU]>]>, - InstrItinData<IIFsqrtDouble , [InstrStage<12, [ALU]>]>, - InstrItinData<IIFrecipFsqrtStep , [InstrStage<5, [ALU]>]> -]>; +def MBlazeGenericItineraries : ProcessorItineraries<[], [], []>; + +//===----------------------------------------------------------------------===// +// MBlaze instruction itineraries for three stage pipeline. +//===----------------------------------------------------------------------===// +include "MBlazeSchedule3.td" + +//===----------------------------------------------------------------------===// +// MBlaze instruction itineraries for five stage pipeline. +//===----------------------------------------------------------------------===// +include "MBlazeSchedule5.td" diff --git a/lib/Target/MBlaze/MBlazeSchedule3.td b/lib/Target/MBlaze/MBlazeSchedule3.td new file mode 100644 index 0000000..ccbf99d --- /dev/null +++ b/lib/Target/MBlaze/MBlazeSchedule3.td @@ -0,0 +1,236 @@ +//===- MBlazeSchedule3.td - MBlaze Scheduling Definitions --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MBlaze instruction itineraries for the three stage pipeline. +//===----------------------------------------------------------------------===// +def MBlazePipe3Itineraries : ProcessorItineraries< + [IF,ID,EX], [], [ + + // ALU instruction with one destination register and either two register + // source operands or one register source operand and one immediate operand. + // The instruction takes one cycle to execute in each of the stages. The + // two source operands are read during the decode stage and the result is + // ready after the execute stage. + InstrItinData< IIC_ALU, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]>], // one cycle in execute stage + [ 2 // result ready after two cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // ALU multiply instruction with one destination register and either two + // register source operands or one register source operand and one immediate + // operand. The instruction takes one cycle to execute in each of the + // pipeline stages except the execute stage, which takes three cycles. The + // two source operands are read during the decode stage and the result is + // ready after the execute stage. + InstrItinData< IIC_ALUm, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<3,[EX]>], // three cycles in execute stage + [ 4 // result ready after four cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // ALU divide instruction with one destination register two register source + // operands. The instruction takes one cycle to execute in each the pipeline + // stages except the execute stage, which takes 34 cycles. The two + // source operands are read during the decode stage and the result is ready + // after the execute stage. + InstrItinData< IIC_ALUd, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<34,[EX]>], // 34 cycles in execute stage + [ 35 // result ready after 35 cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Shift instruction with one destination register and either two register + // source operands or one register source operand and one immediate operand. + // The instruction takes one cycle to execute in each of the pipeline stages + // except the execute stage, which takes two cycles. The two source operands + // are read during the decode stage and the result is ready after the execute + // stage. + InstrItinData< IIC_SHT, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<2,[EX]>], // two cycles in execute stage + [ 3 // result ready after three cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Branch instruction with one source operand register. The instruction takes + // one cycle to execute in each of the pipeline stages. The source operand is + // read during the decode stage. + InstrItinData< IIC_BR, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]>], // one cycle in execute stage + [ 1 ]>, // first operand read after one cycle + + // Conditional branch instruction with two source operand registers. The + // instruction takes one cycle to execute in each of the pipeline stages. The + // two source operands are read during the decode stage. + InstrItinData< IIC_BRc, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]>], // one cycle in execute stage + [ 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Branch and link instruction with one destination register and one source + // operand register. The instruction takes one cycle to execute in each of + // the pipeline stages. The source operand is read during the decode stage + // and the destination register is ready after the execute stage. + InstrItinData< IIC_BRl, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]>], // one cycle in execute stage + [ 2 // result ready after two cycles + , 1 ]>, // first operand read after one cycle + + // Cache control instruction with two source operand registers. The + // instruction takes one cycle to execute in each of the pipeline stages + // except the memory access stage, which takes two cycles. The source + // operands are read during the decode stage. + InstrItinData< IIC_WDC, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<2,[EX]>], // two cycles in execute stage + [ 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Floating point instruction with one destination register and two source + // operand registers. The instruction takes one cycle to execute in each of + // the pipeline stages except the execute stage, which takes six cycles. The + // source operands are read during the decode stage and the results are ready + // after the execute stage. + InstrItinData< IIC_FPU, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<6,[EX]>], // six cycles in execute stage + [ 7 // result ready after seven cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Floating point divide instruction with one destination register and two + // source operand registers. The instruction takes one cycle to execute in + // each of the pipeline stages except the execute stage, which takes 30 + // cycles. The source operands are read during the decode stage and the + // results are ready after the execute stage. + InstrItinData< IIC_FPUd, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<30,[EX]>], // one cycle in execute stage + [ 31 // result ready after 31 cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Convert floating point to integer instruction with one destination + // register and one source operand register. The instruction takes one cycle + // to execute in each of the pipeline stages except the execute stage, + // which takes seven cycles. The source operands are read during the decode + // stage and the results are ready after the execute stage. + InstrItinData< IIC_FPUi, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<7,[EX]>], // seven cycles in execute stage + [ 8 // result ready after eight cycles + , 1 ]>, // first operand read after one cycle + + // Convert integer to floating point instruction with one destination + // register and one source operand register. The instruction takes one cycle + // to execute in each of the pipeline stages except the execute stage, + // which takes six cycles. The source operands are read during the decode + // stage and the results are ready after the execute stage. + InstrItinData< IIC_FPUf, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<6,[EX]>], // six cycles in execute stage + [ 7 // result ready after seven cycles + , 1 ]>, // first operand read after one cycle + + // Floating point square root instruction with one destination register and + // one source operand register. The instruction takes one cycle to execute in + // each of the pipeline stages except the execute stage, which takes 29 + // cycles. The source operands are read during the decode stage and the + // results are ready after the execute stage. + InstrItinData< IIC_FPUs, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<29,[EX]>], // 29 cycles in execute stage + [ 30 // result ready after 30 cycles + , 1 ]>, // first operand read after one cycle + + // Floating point comparison instruction with one destination register and + // two source operand registers. The instruction takes one cycle to execute + // in each of the pipeline stages except the execute stage, which takes three + // cycles. The source operands are read during the decode stage and the + // results are ready after the execute stage. + InstrItinData< IIC_FPUc, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<3,[EX]>], // three cycles in execute stage + [ 4 // result ready after four cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // FSL get instruction with one register or immediate source operand and one + // destination register. The instruction takes one cycle to execute in each + // of the pipeline stages except the execute stage, which takes two cycles. + // The one source operand is read during the decode stage and the result is + // ready after the execute stage. + InstrItinData< IIC_FSLg, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<2,[EX]>], // two cycles in execute stage + [ 3 // result ready after two cycles + , 1 ]>, // first operand read after one cycle + + // FSL put instruction with either two register source operands or one + // register source operand and one immediate operand. There is no result + // produced by the instruction. The instruction takes one cycle to execute in + // each of the pipeline stages except the execute stage, which takes two + // cycles. The two source operands are read during the decode stage. + InstrItinData< IIC_FSLp, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<2,[EX]>], // two cycles in execute stage + [ 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Memory store instruction with either three register source operands or two + // register source operands and one immediate operand. There is no result + // produced by the instruction. The instruction takes one cycle to execute in + // each of the pipeline stages except the execute stage, which takes two + // cycles. All of the source operands are read during the decode stage. + InstrItinData< IIC_MEMs, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<2,[EX]>], // two cycles in execute stage + [ 1 // first operand read after one cycle + , 1 // second operand read after one cycle + , 1 ]>, // third operand read after one cycle + + // Memory load instruction with one destination register and either two + // register source operands or one register source operand and one immediate + // operand. The instruction takes one cycle to execute in each of the + // pipeline stages except the execute stage, which takes two cycles. All of + // the source operands are read during the decode stage and the result is + // ready after the execute stage. + InstrItinData< IIC_MEMl, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<2,[EX]>], // two cycles in execute stage + [ 3 // result ready after four cycles + , 1 // second operand read after one cycle + , 1 ]> // third operand read after one cycle +]>; diff --git a/lib/Target/MBlaze/MBlazeSchedule5.td b/lib/Target/MBlaze/MBlazeSchedule5.td new file mode 100644 index 0000000..fa88766 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeSchedule5.td @@ -0,0 +1,267 @@ +//===- MBlazeSchedule5.td - MBlaze Scheduling Definitions --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MBlaze instruction itineraries for the five stage pipeline. +//===----------------------------------------------------------------------===// +def MBlazePipe5Itineraries : ProcessorItineraries< + [IF,ID,EX,MA,WB], [], [ + + // ALU instruction with one destination register and either two register + // source operands or one register source operand and one immediate operand. + // The instruction takes one cycle to execute in each of the stages. The + // two source operands are read during the decode stage and the result is + // ready after the execute stage. + InstrItinData< IIC_ALU, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<1,[MA]> // one cycle in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 2 // result ready after two cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // ALU multiply instruction with one destination register and either two + // register source operands or one register source operand and one immediate + // operand. The instruction takes one cycle to execute in each of the + // pipeline stages. The two source operands are read during the decode stage + // and the result is ready after the execute stage. + InstrItinData< IIC_ALUm, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<1,[MA]> // one cycle in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 2 // result ready after two cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // ALU divide instruction with one destination register two register source + // operands. The instruction takes one cycle to execute in each the pipeline + // stages except the memory access stage, which takes 31 cycles. The two + // source operands are read during the decode stage and the result is ready + // after the memory access stage. + InstrItinData< IIC_ALUd, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<31,[MA]> // 31 cycles in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 33 // result ready after 33 cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Shift instruction with one destination register and either two register + // source operands or one register source operand and one immediate operand. + // The instruction takes one cycle to execute in each of the pipeline stages. + // The two source operands are read during the decode stage and the result is + // ready after the memory access stage. + InstrItinData< IIC_SHT, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<1,[MA]> // one cycle in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 3 // result ready after three cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Branch instruction with one source operand register. The instruction takes + // one cycle to execute in each of the pipeline stages. The source operand is + // read during the decode stage. + InstrItinData< IIC_BR, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<1,[MA]> // one cycle in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 1 ]>, // first operand read after one cycle + + // Conditional branch instruction with two source operand registers. The + // instruction takes one cycle to execute in each of the pipeline stages. The + // two source operands are read during the decode stage. + InstrItinData< IIC_BRc, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<1,[MA]> // one cycle in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Branch and link instruction with one destination register and one source + // operand register. The instruction takes one cycle to execute in each of + // the pipeline stages. The source operand is read during the decode stage + // and the destination register is ready after the writeback stage. + InstrItinData< IIC_BRl, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<1,[MA]> // one cycle in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 4 // result ready after four cycles + , 1 ]>, // first operand read after one cycle + + // Cache control instruction with two source operand registers. The + // instruction takes one cycle to execute in each of the pipeline stages + // except the memory access stage, which takes two cycles. The source + // operands are read during the decode stage. + InstrItinData< IIC_WDC, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<2,[MA]> // two cycles in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Floating point instruction with one destination register and two source + // operand registers. The instruction takes one cycle to execute in each of + // the pipeline stages except the memory access stage, which takes two + // cycles. The source operands are read during the decode stage and the + // results are ready after the writeback stage. + InstrItinData< IIC_FPU, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<2,[MA]> // two cycles in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 5 // result ready after five cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Floating point divide instruction with one destination register and two + // source operand registers. The instruction takes one cycle to execute in + // each of the pipeline stages except the memory access stage, which takes 26 + // cycles. The source operands are read during the decode stage and the + // results are ready after the writeback stage. + InstrItinData< IIC_FPUd, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<26,[MA]> // 26 cycles in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 29 // result ready after 29 cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Convert floating point to integer instruction with one destination + // register and one source operand register. The instruction takes one cycle + // to execute in each of the pipeline stages except the memory access stage, + // which takes three cycles. The source operands are read during the decode + // stage and the results are ready after the writeback stage. + InstrItinData< IIC_FPUi, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<3,[MA]> // three cycles in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 6 // result ready after six cycles + , 1 ]>, // first operand read after one cycle + + // Convert integer to floating point instruction with one destination + // register and one source operand register. The instruction takes one cycle + // to execute in each of the pipeline stages except the memory access stage, + // which takes two cycles. The source operands are read during the decode + // stage and the results are ready after the writeback stage. + InstrItinData< IIC_FPUf, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<2,[MA]> // two cycles in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 5 // result ready after five cycles + , 1 ]>, // first operand read after one cycle + + // Floating point square root instruction with one destination register and + // one source operand register. The instruction takes one cycle to execute in + // each of the pipeline stages except the memory access stage, which takes 25 + // cycles. The source operands are read during the decode stage and the + // results are ready after the writeback stage. + InstrItinData< IIC_FPUs, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<25,[MA]> // 25 cycles in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 28 // result ready after 28 cycles + , 1 ]>, // first operand read after one cycle + + // Floating point comparison instruction with one destination register and + // two source operand registers. The instruction takes one cycle to execute + // in each of the pipeline stages. The source operands are read during the + // decode stage and the results are ready after the execute stage. + InstrItinData< IIC_FPUc, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<1,[MA]> // one cycle in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 2 // result ready after two cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // FSL get instruction with one register or immediate source operand and one + // destination register. The instruction takes one cycle to execute in each + // of the pipeline stages. The one source operand is read during the decode + // stage and the result is ready after the execute stage. + InstrItinData< IIC_FSLg, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<1,[MA]> // one cycle in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 2 // result ready after two cycles + , 1 ]>, // first operand read after one cycle + + // FSL put instruction with either two register source operands or one + // register source operand and one immediate operand. There is no result + // produced by the instruction. The instruction takes one cycle to execute in + // each of the pipeline stages. The two source operands are read during the + // decode stage. + InstrItinData< IIC_FSLp, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<1,[MA]> // one cycle in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Memory store instruction with either three register source operands or two + // register source operands and one immediate operand. There is no result + // produced by the instruction. The instruction takes one cycle to execute in + // each of the pipeline stages. All of the source operands are read during + // the decode stage. + InstrItinData< IIC_MEMs, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<1,[MA]> // one cycle in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 1 // first operand read after one cycle + , 1 // second operand read after one cycle + , 1 ]>, // third operand read after one cycle + + // Memory load instruction with one destination register and either two + // register source operands or one register source operand and one immediate + // operand. The instruction takes one cycle to execute in each of the + // pipeline stages. All of the source operands are read during the decode + // stage and the result is ready after the writeback stage. + InstrItinData< IIC_MEMl, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<1,[MA]> // one cycle in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 4 // result ready after four cycles + , 1 // second operand read after one cycle + , 1 ]> // third operand read after one cycle +]>; diff --git a/lib/Target/MBlaze/MBlazeSubtarget.cpp b/lib/Target/MBlaze/MBlazeSubtarget.cpp index 3440521..a80744a 100644 --- a/lib/Target/MBlaze/MBlazeSubtarget.cpp +++ b/lib/Target/MBlaze/MBlazeSubtarget.cpp @@ -13,19 +13,39 @@ #include "MBlazeSubtarget.h" #include "MBlaze.h" +#include "MBlazeRegisterInfo.h" #include "MBlazeGenSubtarget.inc" #include "llvm/Support/CommandLine.h" using namespace llvm; MBlazeSubtarget::MBlazeSubtarget(const std::string &TT, const std::string &FS): - HasPipe3(false), HasBarrel(false), HasDiv(false), HasMul(false), - HasFSL(false), HasEFSL(false), HasMSRSet(false), HasException(false), - HasPatCmp(false), HasFPU(false), HasESR(false), HasPVR(false), - HasMul64(false), HasSqrt(false), HasMMU(false) + HasBarrel(false), HasDiv(false), HasMul(false), HasPatCmp(false), + HasFPU(false), HasMul64(false), HasSqrt(false) { - std::string CPU = "v400"; - MBlazeArchVersion = V400; - // Parse features string. - ParseSubtargetFeatures(FS, CPU); + std::string CPU = "mblaze"; + CPU = ParseSubtargetFeatures(FS, CPU); + + // Only use instruction scheduling if the selected CPU has an instruction + // itinerary (the default CPU is the only one that doesn't). + HasItin = CPU != "mblaze"; + DEBUG(dbgs() << "CPU " << CPU << "(" << HasItin << ")\n"); + + // Compute the issue width of the MBlaze itineraries + computeIssueWidth(); +} + +void MBlazeSubtarget::computeIssueWidth() { + InstrItins.IssueWidth = 1; +} + +bool MBlazeSubtarget:: +enablePostRAScheduler(CodeGenOpt::Level OptLevel, + TargetSubtarget::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const { + Mode = TargetSubtarget::ANTIDEP_CRITICAL; + CriticalPathRCs.clear(); + CriticalPathRCs.push_back(&MBlaze::GPRRegClass); + return HasItin && OptLevel >= CodeGenOpt::Default; } + diff --git a/lib/Target/MBlaze/MBlazeSubtarget.h b/lib/Target/MBlaze/MBlazeSubtarget.h index bebb3f7..2255b28 100644 --- a/lib/Target/MBlaze/MBlazeSubtarget.h +++ b/lib/Target/MBlaze/MBlazeSubtarget.h @@ -24,29 +24,14 @@ namespace llvm { class MBlazeSubtarget : public TargetSubtarget { protected: - - enum MBlazeArchEnum { - V400, V500, V600, V700, V710 - }; - - // MBlaze architecture version - MBlazeArchEnum MBlazeArchVersion; - - bool HasPipe3; bool HasBarrel; bool HasDiv; bool HasMul; - bool HasFSL; - bool HasEFSL; - bool HasMSRSet; - bool HasException; bool HasPatCmp; bool HasFPU; - bool HasESR; - bool HasPVR; bool HasMul64; bool HasSqrt; - bool HasMMU; + bool HasItin; InstrItineraryData InstrItins; @@ -61,18 +46,26 @@ public: std::string ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); + /// Compute the number of maximum number of issues per cycle for the + /// MBlaze scheduling itineraries. + void computeIssueWidth(); + + /// enablePostRAScheduler - True at 'More' optimization. + bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, + TargetSubtarget::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const; + + /// getInstrItins - Return the instruction itineraies based on subtarget. + const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } + + bool hasItin() const { return HasItin; } + bool hasPCMP() const { return HasPatCmp; } bool hasFPU() const { return HasFPU; } bool hasSqrt() const { return HasSqrt; } bool hasMul() const { return HasMul; } bool hasMul64() const { return HasMul64; } bool hasDiv() const { return HasDiv; } bool hasBarrel() const { return HasBarrel; } - - bool isV400() const { return MBlazeArchVersion == V400; } - bool isV500() const { return MBlazeArchVersion == V500; } - bool isV600() const { return MBlazeArchVersion == V600; } - bool isV700() const { return MBlazeArchVersion == V700; } - bool isV710() const { return MBlazeArchVersion == V710; } }; } // End llvm namespace diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index cd949e1..df34a83 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -36,19 +36,18 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, bool RelaxAll, bool NoExecStack) { Triple TheTriple(TT); - switch (TheTriple.getOS()) { - case Triple::Darwin: + + if (TheTriple.isOSDarwin()) { llvm_unreachable("MBlaze does not support Darwin MACH-O format"); return NULL; - case Triple::MinGW32: - case Triple::Cygwin: - case Triple::Win32: + } + + if (TheTriple.isOSWindows()) { llvm_unreachable("MBlaze does not support Windows COFF format"); return NULL; - default: - return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, - NoExecStack); } + + return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack); } @@ -87,7 +86,8 @@ MBlazeTargetMachine(const Target &T, const std::string &TT, DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"), InstrInfo(*this), FrameLowering(Subtarget), - TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this) { + TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this), + InstrItins(Subtarget.getInstrItineraryData()) { if (getRelocationModel() == Reloc::Default) { setRelocationModel(Reloc::Static); } diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h index 45ad078..48ce37a 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.h +++ b/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -38,13 +38,18 @@ namespace llvm { MBlazeSelectionDAGInfo TSInfo; MBlazeIntrinsicInfo IntrinsicInfo; MBlazeELFWriterInfo ELFWriterInfo; + InstrItineraryData InstrItins; + public: MBlazeTargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &FS); virtual const MBlazeInstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const InstrItineraryData *getInstrItineraryData() const + { return &InstrItins; } + virtual const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; } diff --git a/lib/Target/MBlaze/TODO b/lib/Target/MBlaze/TODO index 2e613eb..317d7c0 100644 --- a/lib/Target/MBlaze/TODO +++ b/lib/Target/MBlaze/TODO @@ -9,8 +9,6 @@ needs to be examined more closely: - The stack layout needs to be examined to make sure it meets the standard, especially in regards to var arg functions. - - The processor itineraries are copied from a different backend - and need to be updated to model the MicroBlaze correctly. - Look at the MBlazeGenFastISel.inc stuff and make use of it if appropriate. @@ -18,9 +16,6 @@ There are a few things that need to be looked at: - There are some instructions that are not generated by the backend and have not been tested as far as the parser is concerned. - - The assembly parser does not use any MicroBlaze specific directives. + - The assembly parser does not use many MicroBlaze specific directives. I should investigate if there are MicroBlaze specific directive and, if there are, add them. - - The instruction MFS and MTS use special names for some of the - special registers that can be accessed. These special register - names should be parsed by the assembly parser. diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h index f0e1ce2..63860dc 100644 --- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h +++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h @@ -18,11 +18,12 @@ namespace llvm { class MCOperand; + class TargetMachine; class MSP430InstPrinter : public MCInstPrinter { public: - MSP430InstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) { - } + MSP430InstPrinter(TargetMachine &TM, const MCAsmInfo &MAI) + : MCInstPrinter(MAI) {} virtual void printInst(const MCInst *MI, raw_ostream &O); diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp index a1a7f44..5264d68 100644 --- a/lib/Target/MSP430/MSP430AsmPrinter.cpp +++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -164,10 +164,11 @@ void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) { } static MCInstPrinter *createMSP430MCInstPrinter(const Target &T, + TargetMachine &TM, unsigned SyntaxVariant, const MCAsmInfo &MAI) { if (SyntaxVariant == 0) - return new MSP430InstPrinter(MAI); + return new MSP430InstPrinter(TM, MAI); return 0; } diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index a95d59c..006785b 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -515,7 +515,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Build a sequence of copy-to-reg nodes chained together with token chain and // flag operands which copy the outgoing args into registers. The InFlag in - // necessary since all emited instructions must be stuck together. + // necessary since all emitted instructions must be stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index 26df1a0..8939b0a 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -13,6 +13,7 @@ tablegen(MipsGenSubtarget.inc -gen-subtarget) add_llvm_target(MipsCodeGen MipsAsmPrinter.cpp MipsDelaySlotFiller.cpp + MipsExpandPseudo.cpp MipsInstrInfo.cpp MipsISelDAGToDAG.cpp MipsISelLowering.cpp diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h index a9ab050..05b4c5a 100644 --- a/lib/Target/Mips/Mips.h +++ b/lib/Target/Mips/Mips.h @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file contains the entry points for global functions defined in +// This file contains the entry points for global functions defined in // the LLVM Mips back-end. // //===----------------------------------------------------------------------===// @@ -25,6 +25,7 @@ namespace llvm { FunctionPass *createMipsISelDag(MipsTargetMachine &TM); FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM); + FunctionPass *createMipsExpandPseudoPass(MipsTargetMachine &TM); extern Target TheMipsTarget; extern Target TheMipselTarget; diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index 3e6437b..b79016d 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -59,7 +59,7 @@ def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1", def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2", "Mips2 ISA Support">; def FeatureMips32 : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32", - "Mips32 ISA Support", + "Mips32 ISA Support", [FeatureCondMov, FeatureBitCount]>; def FeatureMips32r2 : SubtargetFeature<"mips32r2", "MipsArchVersion", "Mips32r2", "Mips32r2 ISA Support", @@ -81,7 +81,7 @@ def : Proc<"r6000", [FeatureMips2]>; def : Proc<"4ke", [FeatureMips32r2]>; -// Allegrex is a 32bit subset of r4000, both for interger and fp registers, +// Allegrex is a 32bit subset of r4000, both for integer and fp registers, // but much more similar to Mips2 than Mips3. It also contains some of // Mips32/Mips32r2 instructions and a custom vector fpu processor. def : Proc<"allegrex", [FeatureMips2, FeatureSingleFloat, FeatureEABI, diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index bd28a9b..502f744 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -30,7 +30,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" @@ -53,14 +53,14 @@ namespace { return "Mips Assembly Printer"; } - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O); void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O); - void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, + void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, const char *Modifier = 0); - void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, + void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, const char *Modifier = 0); void printSavedRegsBitmask(raw_ostream &O); void printHex32(unsigned int Value, raw_ostream &O); @@ -77,7 +77,8 @@ namespace { } virtual void EmitFunctionBodyStart(); virtual void EmitFunctionBodyEnd(); - virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const; + virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock* + MBB) const; static const char *getRegisterName(unsigned RegNo); virtual void EmitFunctionEntryLabel(); @@ -94,12 +95,12 @@ namespace { // -- Frame directive "frame Stackpointer, Stacksize, RARegister" // Describe the stack frame. // -// -- Mask directives "(f)mask bitmask, offset" +// -- Mask directives "(f)mask bitmask, offset" // Tells the assembler which registers are saved and where. -// bitmask - contain a little endian bitset indicating which registers are -// saved on function prologue (e.g. with a 0x80000000 mask, the +// bitmask - contain a little endian bitset indicating which registers are +// saved on function prologue (e.g. with a 0x80000000 mask, the // assembler knows the register 31 (RA) is saved at prologue. -// offset - the position before stack pointer subtraction indicating where +// offset - the position before stack pointer subtraction indicating where // the first saved register on prologue is located. (e.g. with a // // Consider the following function prologue: @@ -110,9 +111,9 @@ namespace { // sw $ra, 40($sp) // sw $fp, 36($sp) // -// With a 0xc0000000 mask, the assembler knows the register 31 (RA) and -// 30 (FP) are saved at prologue. As the save order on prologue is from -// left to right, RA is saved first. A -8 offset means that after the +// With a 0xc0000000 mask, the assembler knows the register 31 (RA) and +// 30 (FP) are saved at prologue. As the save order on prologue is from +// left to right, RA is saved first. A -8 offset means that after the // stack pointer subtration, the first register in the mask (RA) will be // saved at address 48-8=40. // @@ -122,7 +123,7 @@ namespace { // Mask directives //===----------------------------------------------------------------------===// -// Create a bitmask with all callee saved registers for CPU or Floating Point +// Create a bitmask with all callee saved registers for CPU or Floating Point // registers. For CPU registers consider RA, GP and FP for saving if necessary. void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) { const TargetFrameLowering *TFI = TM.getFrameLowering(); @@ -168,7 +169,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) { // Print a 32 bit hex number with all numbers. void MipsAsmPrinter::printHex32(unsigned Value, raw_ostream &O) { O << "0x"; - for (int i = 7; i >= 0; i--) + for (int i = 7; i >= 0; i--) O << utohexstr((Value & (0xF << (i*4))) >> (i*4)); } @@ -191,9 +192,9 @@ void MipsAsmPrinter::emitFrameDirective() { } /// Emit Set directives. -const char *MipsAsmPrinter::getCurrentABIString() const { +const char *MipsAsmPrinter::getCurrentABIString() const { switch (Subtarget->getTargetABI()) { - case MipsSubtarget::O32: return "abi32"; + case MipsSubtarget::O32: return "abi32"; case MipsSubtarget::O64: return "abiO64"; case MipsSubtarget::N32: return "abiN32"; case MipsSubtarget::N64: return "abi64"; @@ -203,7 +204,7 @@ const char *MipsAsmPrinter::getCurrentABIString() const { llvm_unreachable("Unknown Mips ABI"); return NULL; -} +} void MipsAsmPrinter::EmitFunctionEntryLabel() { OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName())); @@ -214,7 +215,7 @@ void MipsAsmPrinter::EmitFunctionEntryLabel() { /// the first basic block in the function. void MipsAsmPrinter::EmitFunctionBodyStart() { emitFrameDirective(); - + SmallString<128> Str; raw_svector_ostream OS(Str); printSavedRegsBitmask(OS); @@ -226,7 +227,7 @@ void MipsAsmPrinter::EmitFunctionBodyStart() { void MipsAsmPrinter::EmitFunctionBodyEnd() { // There are instruction for this macros, but they must // always be at the function end, and we can't emit and - // break with BB logic. + // break with BB logic. OutStreamer.EmitRawText(StringRef("\t.set\tmacro")); OutStreamer.EmitRawText(StringRef("\t.set\treorder")); OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName())); @@ -236,8 +237,8 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() { /// isBlockOnlyReachableByFallthough - Return true if the basic block has /// exactly one predecessor and the control transfer mechanism between /// the predecessor and this block is a fall-through. -bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) - const { +bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock* + MBB) const { // The predecessor has to be immediately before this block. const MachineBasicBlock *Pred = *MBB->pred_begin(); @@ -246,16 +247,41 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock * if (const BasicBlock *bb = Pred->getBasicBlock()) if (isa<SwitchInst>(bb->getTerminator())) return false; + + // If this is a landing pad, it isn't a fall through. If it has no preds, + // then nothing falls through to it. + if (MBB->isLandingPad() || MBB->pred_empty()) + return false; + + // If there isn't exactly one predecessor, it can't be a fall through. + MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI; + ++PI2; + + if (PI2 != MBB->pred_end()) + return false; + + // The predecessor has to be immediately before this block. + if (!Pred->isLayoutSuccessor(MBB)) + return false; + + // If the block is completely empty, then it definitely does fall through. + if (Pred->empty()) + return true; - return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB); + // Otherwise, check the last instruction. + // Check if the last terminator is an unconditional branch. + MachineBasicBlock::const_iterator I = Pred->end(); + while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) ; + + return !I->getDesc().isBarrier(); } // Print out an operand for an inline asm expression. -bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, +bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,const char *ExtraCode, raw_ostream &O) { // Does this asm operand have a single letter operand modifier? - if (ExtraCode && ExtraCode[0]) + if (ExtraCode && ExtraCode[0]) return true; // Unknown modifier. printOperand(MI, OpNo, O); @@ -273,22 +299,9 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, switch(MO.getTargetFlags()) { case MipsII::MO_GPREL: O << "%gp_rel("; break; case MipsII::MO_GOT_CALL: O << "%call16("; break; - case MipsII::MO_GOT: { - const MachineOperand &LastMO = MI->getOperand(opNum-1); - bool LastMOIsGP = LastMO.getType() == MachineOperand::MO_Register - && LastMO.getReg() == Mips::GP; - if (MI->getOpcode() == Mips::LW || LastMOIsGP) - O << "%got("; - else - O << "%lo("; - break; - } - case MipsII::MO_ABS_HILO: - if (MI->getOpcode() == Mips::LUi) - O << "%hi("; - else - O << "%lo("; - break; + case MipsII::MO_GOT: O << "%got("; break; + case MipsII::MO_ABS_HI: O << "%hi("; break; + case MipsII::MO_ABS_LO: O << "%lo("; break; } switch (MO.getType()) { @@ -308,6 +321,12 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, O << *Mang->getSymbol(MO.getGlobal()); break; + case MachineOperand::MO_BlockAddress: { + MCSymbol* BA = GetBlockAddressSymbol(MO.getBlockAddress()); + O << BA->getName(); + break; + } + case MachineOperand::MO_ExternalSymbol: O << *GetExternalSymbolSymbol(MO.getSymbolName()); break; @@ -323,7 +342,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, if (MO.getOffset()) O << "+" << MO.getOffset(); break; - + default: llvm_unreachable("<unknown operand type>"); } @@ -336,7 +355,7 @@ void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum, const MachineOperand &MO = MI->getOperand(opNum); if (MO.isImm()) O << (unsigned short int)MO.getImm(); - else + else printOperand(MI, opNum, O); } @@ -352,8 +371,8 @@ printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, return; } - // Load/Store memory operands -- imm($reg) - // If PIC target the target is loaded as the + // Load/Store memory operands -- imm($reg) + // If PIC target the target is loaded as the // pattern lw $25,%call16($28) printOperand(MI, opNum, O); O << "("; @@ -365,12 +384,12 @@ void MipsAsmPrinter:: printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, const char *Modifier) { const MachineOperand& MO = MI->getOperand(opNum); - O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm()); + O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm()); } void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { // FIXME: Use SwitchSection. - + // Tell the assembler which ABI we are using OutStreamer.EmitRawText("\t.section .mdebug." + Twine(getCurrentABIString())); @@ -383,11 +402,11 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { } // return to previous section - OutStreamer.EmitRawText(StringRef("\t.previous")); + OutStreamer.EmitRawText(StringRef("\t.previous")); } // Force static initialization. -extern "C" void LLVMInitializeMipsAsmPrinter() { +extern "C" void LLVMInitializeMipsAsmPrinter() { RegisterAsmPrinter<MipsAsmPrinter> X(TheMipsTarget); RegisterAsmPrinter<MipsAsmPrinter> Y(TheMipselTarget); } diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index 8f313ef..57aeb1d 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -1,23 +1,23 @@ //===- MipsCallingConv.td - Calling Conventions for Mips ---*- tablegen -*-===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // This describes the calling conventions for Mips architecture. //===----------------------------------------------------------------------===// /// CCIfSubtarget - Match if the current subtarget has a feature F. -class CCIfSubtarget<string F, CCAction A>: +class CCIfSubtarget<string F, CCAction A>: CCIf<!strconcat("State.getTarget().getSubtarget<MipsSubtarget>().", F), A>; //===----------------------------------------------------------------------===// // Mips O32 Calling Convention //===----------------------------------------------------------------------===// -// Only the return rules are defined here for O32. The rules for argument +// Only the return rules are defined here for O32. The rules for argument // passing are defined in MipsISelLowering.cpp. def RetCC_MipsO32 : CallingConv<[ // i32 are returned in registers V0, V1 @@ -41,15 +41,15 @@ def CC_MipsEABI : CallingConv<[ // Integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>, - // Single fp arguments are passed in pairs within 32-bit mode - CCIfType<[f32], CCIfSubtarget<"isSingleFloat()", + // Single fp arguments are passed in pairs within 32-bit mode + CCIfType<[f32], CCIfSubtarget<"isSingleFloat()", CCAssignToReg<[F12, F13, F14, F15, F16, F17, F18, F19]>>>, - CCIfType<[f32], CCIfSubtarget<"isNotSingleFloat()", + CCIfType<[f32], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[F12, F14, F16, F18]>>>, - // The first 4 doubl fp arguments are passed in single fp registers. - CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", + // The first 4 double fp arguments are passed in single fp registers. + CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D6, D7, D8, D9]>>>, // Integer values get stored in stack slots that are 4 bytes in diff --git a/lib/Target/Mips/MipsExpandPseudo.cpp b/lib/Target/Mips/MipsExpandPseudo.cpp new file mode 100644 index 0000000..4423f51 --- /dev/null +++ b/lib/Target/Mips/MipsExpandPseudo.cpp @@ -0,0 +1,117 @@ +//===-- MipsExpandPseudo.cpp - Expand pseudo instructions ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass expands pseudo instructions into target instructions after register +// allocation but before post-RA scheduling. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-expand-pseudo" + +#include "Mips.h" +#include "MipsTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/Statistic.h" + +using namespace llvm; + +namespace { + struct MipsExpandPseudo : public MachineFunctionPass { + + TargetMachine &TM; + const TargetInstrInfo *TII; + + static char ID; + MipsExpandPseudo(TargetMachine &tm) + : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { } + + virtual const char *getPassName() const { + return "Mips PseudoInstrs Expansion"; + } + + bool runOnMachineFunction(MachineFunction &F); + bool runOnMachineBasicBlock(MachineBasicBlock &MBB); + + private: + void ExpandBuildPairF64(MachineBasicBlock&, MachineBasicBlock::iterator); + void ExpandExtractElementF64(MachineBasicBlock&, + MachineBasicBlock::iterator); + }; + char MipsExpandPseudo::ID = 0; +} // end of anonymous namespace + +bool MipsExpandPseudo::runOnMachineFunction(MachineFunction& F) { + bool Changed = false; + + for (MachineFunction::iterator I = F.begin(); I != F.end(); ++I) + Changed |= runOnMachineBasicBlock(*I); + + return Changed; +} + +bool MipsExpandPseudo::runOnMachineBasicBlock(MachineBasicBlock& MBB) { + + bool Changed = false; + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) { + const TargetInstrDesc& Tid = I->getDesc(); + + switch(Tid.getOpcode()) { + default: + ++I; + continue; + case Mips::BuildPairF64: + ExpandBuildPairF64(MBB, I); + break; + case Mips::ExtractElementF64: + ExpandExtractElementF64(MBB, I); + break; + } + + // delete original instr + MBB.erase(I++); + Changed = true; + } + + return Changed; +} + +void MipsExpandPseudo::ExpandBuildPairF64(MachineBasicBlock& MBB, + MachineBasicBlock::iterator I) { + unsigned DstReg = I->getOperand(0).getReg(); + unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg(); + const TargetInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1); + DebugLoc dl = I->getDebugLoc(); + const unsigned* SubReg = + TM.getRegisterInfo()->getSubRegisters(DstReg); + + // mtc1 Lo, $fp + // mtc1 Hi, $fp + 1 + BuildMI(MBB, I, dl, Mtc1Tdd, *SubReg).addReg(LoReg); + BuildMI(MBB, I, dl, Mtc1Tdd, *(SubReg + 1)).addReg(HiReg); +} + +void MipsExpandPseudo::ExpandExtractElementF64(MachineBasicBlock& MBB, + MachineBasicBlock::iterator I) { + unsigned DstReg = I->getOperand(0).getReg(); + unsigned SrcReg = I->getOperand(1).getReg(); + unsigned N = I->getOperand(2).getImm(); + const TargetInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1); + DebugLoc dl = I->getDebugLoc(); + const unsigned* SubReg = TM.getRegisterInfo()->getSubRegisters(SrcReg); + + BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(*(SubReg + N)); +} + +/// createMipsMipsExpandPseudoPass - Returns a pass that expands pseudo +/// instrs into real instrs +FunctionPass *llvm::createMipsExpandPseudoPass(MipsTargetMachine &tm) { + return new MipsExpandPseudo(tm); +} diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index 87a097a..21e3314 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -203,6 +203,46 @@ void MipsFrameLowering::adjustMipsStackFrame(MachineFunction &MF) const { MipsFI->setFPUTopSavedRegOff(TopFPUSavedRegOff-StackOffset); } + +// expand pair of register and immediate if the immediate doesn't fit in the +// 16-bit offset field. +// e.g. +// if OrigImm = 0x10000, OrigReg = $sp: +// generate the following sequence of instrs: +// lui $at, hi(0x10000) +// addu $at, $sp, $at +// +// (NewReg, NewImm) = ($at, lo(Ox10000)) +// return true +static bool expandRegLargeImmPair(unsigned OrigReg, int OrigImm, + unsigned& NewReg, int& NewImm, + MachineBasicBlock& MBB, + MachineBasicBlock::iterator I) { + // OrigImm fits in the 16-bit field + if (OrigImm < 0x8000 && OrigImm >= -0x8000) { + NewReg = OrigReg; + NewImm = OrigImm; + return false; + } + + MachineFunction* MF = MBB.getParent(); + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + DebugLoc DL = I->getDebugLoc(); + int ImmLo = OrigImm & 0xffff; + int ImmHi = (((unsigned)OrigImm & 0xffff0000) >> 16) + + ((OrigImm & 0x8000) != 0); + + // FIXME: change this when mips goes MC". + BuildMI(MBB, I, DL, TII->get(Mips::NOAT)); + BuildMI(MBB, I, DL, TII->get(Mips::LUi), Mips::AT).addImm(ImmHi); + BuildMI(MBB, I, DL, TII->get(Mips::ADDu), Mips::AT).addReg(OrigReg) + .addReg(Mips::AT); + NewReg = Mips::AT; + NewImm = ImmLo; + + return true; +} + void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -214,6 +254,9 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_); + unsigned NewReg = 0; + int NewImm = 0; + bool ATUsed; // Get the right frame order for Mips. adjustMipsStackFrame(MF); @@ -236,22 +279,40 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO)); // Adjust stack : addi sp, sp, (-imm) + ATUsed = expandRegLargeImmPair(Mips::SP, -StackSize, NewReg, NewImm, MBB, + MBBI); BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP) - .addReg(Mips::SP).addImm(-StackSize); + .addReg(NewReg).addImm(NewImm); + + // FIXME: change this when mips goes MC". + if (ATUsed) + BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO)); - // Save the return address only if the function isnt a leaf one. + // Save the return address only if the function isn't a leaf one. // sw $ra, stack_loc($sp) if (MFI->adjustsStack()) { + ATUsed = expandRegLargeImmPair(Mips::SP, RAOffset, NewReg, NewImm, MBB, + MBBI); BuildMI(MBB, MBBI, dl, TII.get(Mips::SW)) - .addReg(Mips::RA).addImm(RAOffset).addReg(Mips::SP); + .addReg(Mips::RA).addImm(NewImm).addReg(NewReg); + + // FIXME: change this when mips goes MC". + if (ATUsed) + BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO)); } // if framepointer enabled, save it and set it // to point to the stack pointer if (hasFP(MF)) { // sw $fp,stack_loc($sp) + ATUsed = expandRegLargeImmPair(Mips::SP, FPOffset, NewReg, NewImm, MBB, + MBBI); BuildMI(MBB, MBBI, dl, TII.get(Mips::SW)) - .addReg(Mips::FP).addImm(FPOffset).addReg(Mips::SP); + .addReg(Mips::FP).addImm(NewImm).addReg(NewReg); + + // FIXME: change this when mips goes MC". + if (ATUsed) + BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO)); // move $fp, $sp BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::FP) @@ -280,6 +341,10 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF, int FPOffset = MipsFI->getFPStackOffset(); int RAOffset = MipsFI->getRAStackOffset(); + unsigned NewReg = 0; + int NewImm = 0; + bool ATUsed = false; + // if framepointer enabled, restore it and restore the // stack pointer if (hasFP(MF)) { @@ -288,21 +353,39 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF, .addReg(Mips::FP).addReg(Mips::ZERO); // lw $fp,stack_loc($sp) + ATUsed = expandRegLargeImmPair(Mips::SP, FPOffset, NewReg, NewImm, MBB, + MBBI); BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::FP) - .addImm(FPOffset).addReg(Mips::SP); + .addImm(NewImm).addReg(NewReg); + + // FIXME: change this when mips goes MC". + if (ATUsed) + BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO)); } - // Restore the return address only if the function isnt a leaf one. + // Restore the return address only if the function isn't a leaf one. // lw $ra, stack_loc($sp) if (MFI->adjustsStack()) { + ATUsed = expandRegLargeImmPair(Mips::SP, RAOffset, NewReg, NewImm, MBB, + MBBI); BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::RA) - .addImm(RAOffset).addReg(Mips::SP); + .addImm(NewImm).addReg(NewReg); + + // FIXME: change this when mips goes MC". + if (ATUsed) + BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO)); } // adjust stack : insert addi sp, sp, (imm) if (NumBytes) { + ATUsed = expandRegLargeImmPair(Mips::SP, NumBytes, NewReg, NewImm, MBB, + MBBI); BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP) - .addReg(Mips::SP).addImm(NumBytes); + .addReg(NewReg).addImm(NewImm); + + // FIXME: change this when mips goes MC". + if (ATUsed) + BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO)); } } diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h index a8426c1..34647df 100644 --- a/lib/Target/Mips/MipsFrameLowering.h +++ b/lib/Target/Mips/MipsFrameLowering.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ALPHA_FRAMEINFO_H -#define ALPHA_FRAMEINFO_H +#ifndef MIPS_FRAMEINFO_H +#define MIPS_FRAMEINFO_H #include "Mips.h" #include "MipsSubtarget.h" diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 755e04d..0382964 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -52,19 +52,19 @@ class MipsDAGToDAGISel : public SelectionDAGISel { /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can /// make the right decision when generating code for different targets. const MipsSubtarget &Subtarget; - + public: explicit MipsDAGToDAGISel(MipsTargetMachine &tm) : SelectionDAGISel(tm), TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {} - + // Pass Name virtual const char *getPassName() const { return "MIPS DAG->DAG Pattern Instruction Selection"; - } - + } -private: + +private: // Include the pieces autogenerated from the target description. #include "MipsGenDAGISel.inc" @@ -116,12 +116,14 @@ SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) { Offset = CurDAG->getTargetConstant(0, MVT::i32); return true; } - + // on PIC code Load GA if (TM.getRelocationModel() == Reloc::PIC_) { - if ((Addr.getOpcode() == ISD::TargetGlobalAddress) || - (Addr.getOpcode() == ISD::TargetConstantPool) || - (Addr.getOpcode() == ISD::TargetJumpTable)){ + if ((Addr.getOpcode() == ISD::TargetGlobalAddress) || + (Addr.getOpcode() == ISD::TargetConstantPool) || + (Addr.getOpcode() == ISD::TargetJumpTable) || + (Addr.getOpcode() == ISD::TargetBlockAddress) || + (Addr.getOpcode() == ISD::TargetExternalSymbol)) { Base = CurDAG->getRegister(Mips::GP, MVT::i32); Offset = Addr; return true; @@ -130,8 +132,8 @@ SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) { if ((Addr.getOpcode() == ISD::TargetExternalSymbol || Addr.getOpcode() == ISD::TargetGlobalAddress)) return false; - } - + } + // Operand is a result from an ADD. if (Addr.getOpcode() == ISD::ADD) { if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { @@ -158,10 +160,10 @@ SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) { // Generate: // lui $2, %hi($CPI1_0) // lwc1 $f0, %lo($CPI1_0)($2) - if ((Addr.getOperand(0).getOpcode() == MipsISD::Hi || + if ((Addr.getOperand(0).getOpcode() == MipsISD::Hi || Addr.getOperand(0).getOpcode() == ISD::LOAD) && Addr.getOperand(1).getOpcode() == MipsISD::Lo) { - SDValue LoVal = Addr.getOperand(1); + SDValue LoVal = Addr.getOperand(1); if (dyn_cast<ConstantPoolSDNode>(LoVal.getOperand(0))) { Base = Addr.getOperand(0); Offset = LoVal.getOperand(0); @@ -176,7 +178,7 @@ SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) { } SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) { - MVT::SimpleValueType NVT = + MVT::SimpleValueType NVT = N->getValueType(0).getSimpleVT().SimpleTy; if (!Subtarget.isMips1() || NVT != MVT::f64) @@ -199,14 +201,14 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) { MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); DebugLoc dl = N->getDebugLoc(); - // The second load should start after for 4 bytes. + // The second load should start after for 4 bytes. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0)) Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32); else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Offset0)) - Offset1 = CurDAG->getTargetConstantPool(CP->getConstVal(), - MVT::i32, - CP->getAlignment(), - CP->getOffset()+4, + Offset1 = CurDAG->getTargetConstantPool(CP->getConstVal(), + MVT::i32, + CP->getAlignment(), + CP->getOffset()+4, CP->getTargetFlags()); else return NULL; @@ -220,16 +222,16 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) { // Generate: // lwc $f0, X($3) // lwc $f1, X+4($3) - SDNode *LD0 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32, + SDNode *LD0 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32, MVT::Other, Offset0, Base, Chain); SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, NVT), 0); - SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl, + SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl, MVT::f64, Undef, SDValue(LD0, 0)); SDNode *LD1 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32, MVT::Other, Offset1, Base, SDValue(LD0, 1)); - SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl, + SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl, MVT::f64, I0, SDValue(LD1, 0)); ReplaceUses(SDValue(N, 0), I1); @@ -241,7 +243,7 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) { SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) { - if (!Subtarget.isMips1() || + if (!Subtarget.isMips1() || N->getOperand(1).getValueType() != MVT::f64) return NULL; @@ -265,12 +267,12 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) { DebugLoc dl = N->getDebugLoc(); // Get the even and odd part from the f64 register - SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::sub_fpodd, + SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::sub_fpodd, dl, MVT::f32, N1); SDValue FPEven = CurDAG->getTargetExtractSubreg(Mips::sub_fpeven, dl, MVT::f32, N1); - // The second store should start after for 4 bytes. + // The second store should start after for 4 bytes. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0)) Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32); else @@ -315,26 +317,26 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { } /// - // Instruction Selection not handled by the auto-generated + // Instruction Selection not handled by the auto-generated // tablegen selection should be handled here. - /// + /// switch(Opcode) { default: break; - case ISD::SUBE: + case ISD::SUBE: case ISD::ADDE: { SDValue InFlag = Node->getOperand(2), CmpLHS; unsigned Opc = InFlag.getOpcode(); (void)Opc; - assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || - (Opc == ISD::SUBC || Opc == ISD::SUBE)) && + assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || + (Opc == ISD::SUBC || Opc == ISD::SUBE)) && "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); unsigned MOp; if (Opcode == ISD::ADDE) { CmpLHS = InFlag.getValue(0); MOp = Mips::ADDu; - } else { + } else { CmpLHS = InFlag.getOperand(0); MOp = Mips::SUBu; } @@ -346,7 +348,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { EVT VT = LHS.getValueType(); SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, dl, VT, Ops, 2); - SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT, + SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT, SDValue(Carry,0), RHS); return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, @@ -356,36 +358,34 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { /// Mul/Div with two results case ISD::SDIVREM: case ISD::UDIVREM: + break; case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: { SDValue Op1 = Node->getOperand(0); SDValue Op2 = Node->getOperand(1); unsigned Op; - if (Opcode == ISD::UMUL_LOHI || Opcode == ISD::SMUL_LOHI) - Op = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT); - else - Op = (Opcode == ISD::UDIVREM ? Mips::DIVu : Mips::DIV); + Op = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT); - SDNode *MulDiv = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2); + SDNode *Mul = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2); - SDValue InFlag = SDValue(MulDiv, 0); - SDNode *Lo = CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32, + SDValue InFlag = SDValue(Mul, 0); + SDNode *Lo = CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32, MVT::Glue, InFlag); InFlag = SDValue(Lo,1); SDNode *Hi = CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag); - if (!SDValue(Node, 0).use_empty()) + if (!SDValue(Node, 0).use_empty()) ReplaceUses(SDValue(Node, 0), SDValue(Lo,0)); - if (!SDValue(Node, 1).use_empty()) + if (!SDValue(Node, 1).use_empty()) ReplaceUses(SDValue(Node, 1), SDValue(Hi,0)); return NULL; } /// Special Muls - case ISD::MUL: + case ISD::MUL: if (Subtarget.isMips32()) break; case ISD::MULHS: @@ -394,7 +394,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { SDValue MulOp2 = Node->getOperand(1); unsigned MulOp = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT); - SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl, + SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl, MVT::Glue, MulOp1, MulOp2); SDValue InFlag = SDValue(MulNode, 0); @@ -408,24 +408,9 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { /// Div/Rem operations case ISD::SREM: case ISD::UREM: - case ISD::SDIV: - case ISD::UDIV: { - SDValue Op1 = Node->getOperand(0); - SDValue Op2 = Node->getOperand(1); - - unsigned Op, MOp; - if (Opcode == ISD::SDIV || Opcode == ISD::UDIV) { - Op = (Opcode == ISD::SDIV ? Mips::DIV : Mips::DIVu); - MOp = Mips::MFLO; - } else { - Op = (Opcode == ISD::SREM ? Mips::DIV : Mips::DIVu); - MOp = Mips::MFHI; - } - SDNode *Node = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2); - - SDValue InFlag = SDValue(Node, 0); - return CurDAG->getMachineNode(MOp, dl, MVT::i32, InFlag); - } + case ISD::SDIV: + case ISD::UDIV: + break; // Get target GOT address. case ISD::GLOBAL_OFFSET_TABLE: @@ -433,15 +418,15 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { case ISD::ConstantFP: { ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node); - if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { - SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { + SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, Mips::ZERO, MVT::i32); SDValue Undef = SDValue( CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::f64), 0); SDNode *MTC = CurDAG->getMachineNode(Mips::MTC1, dl, MVT::f32, Zero); - SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl, + SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl, MVT::f64, Undef, SDValue(MTC, 0)); - SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl, + SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl, MVT::f64, I0, SDValue(MTC, 0)); ReplaceUses(SDValue(Node, 0), I1); return I1.getNode(); @@ -460,61 +445,6 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { return ResNode; // Other cases are autogenerated. break; - - /// Handle direct and indirect calls when using PIC. On PIC, when - /// GOT is smaller than about 64k (small code) the GA target is - /// loaded with only one instruction. Otherwise GA's target must - /// be loaded with 3 instructions. - case MipsISD::JmpLink: { - if (TM.getRelocationModel() == Reloc::PIC_) { - unsigned LastOpNum = Node->getNumOperands()-1; - - SDValue Chain = Node->getOperand(0); - SDValue Callee = Node->getOperand(1); - SDValue InFlag; - - // Skip the incomming flag if present - if (Node->getOperand(LastOpNum).getValueType() == MVT::Glue) - LastOpNum--; - - if ( (isa<GlobalAddressSDNode>(Callee)) || - (isa<ExternalSymbolSDNode>(Callee)) ) - { - /// Direct call for global addresses and external symbols - SDValue GPReg = CurDAG->getRegister(Mips::GP, MVT::i32); - - // Use load to get GOT target - SDValue Ops[] = { Callee, GPReg, Chain }; - SDValue Load = SDValue(CurDAG->getMachineNode(Mips::LW, dl, MVT::i32, - MVT::Other, Ops, 3), 0); - Chain = Load.getValue(1); - - // Call target must be on T9 - Chain = CurDAG->getCopyToReg(Chain, dl, Mips::T9, Load, InFlag); - } else - /// Indirect call - Chain = CurDAG->getCopyToReg(Chain, dl, Mips::T9, Callee, InFlag); - - // Map the JmpLink operands to JALR - SDVTList NodeTys = CurDAG->getVTList(MVT::Other, MVT::Glue); - SmallVector<SDValue, 8> Ops; - Ops.push_back(CurDAG->getRegister(Mips::T9, MVT::i32)); - - for (unsigned i = 2, e = LastOpNum+1; i != e; ++i) - Ops.push_back(Node->getOperand(i)); - Ops.push_back(Chain); - Ops.push_back(Chain.getValue(1)); - - // Emit Jump and Link Register - SDNode *ResNode = CurDAG->getMachineNode(Mips::JALR, dl, NodeTys, - &Ops[0], Ops.size()); - - // Replace Chain and InFlag - ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); - ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 1)); - return ResNode; - } - } } // Select the default instruction @@ -529,7 +459,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { return ResNode; } -/// createMipsISelDag - This pass converts a legalized DAG into a +/// createMipsISelDag - This pass converts a legalized DAG into a /// MIPS-specific DAG, ready for instruction scheduling. FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) { return new MipsDAGToDAGISel(TM); diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 1d7a1c0..1f1220f 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -41,15 +41,19 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::Lo : return "MipsISD::Lo"; case MipsISD::GPRel : return "MipsISD::GPRel"; case MipsISD::Ret : return "MipsISD::Ret"; - case MipsISD::SelectCC : return "MipsISD::SelectCC"; - case MipsISD::FPSelectCC : return "MipsISD::FPSelectCC"; case MipsISD::FPBrcond : return "MipsISD::FPBrcond"; case MipsISD::FPCmp : return "MipsISD::FPCmp"; + case MipsISD::CMovFP_T : return "MipsISD::CMovFP_T"; + case MipsISD::CMovFP_F : return "MipsISD::CMovFP_F"; case MipsISD::FPRound : return "MipsISD::FPRound"; case MipsISD::MAdd : return "MipsISD::MAdd"; case MipsISD::MAddu : return "MipsISD::MAddu"; case MipsISD::MSub : return "MipsISD::MSub"; case MipsISD::MSubu : return "MipsISD::MSubu"; + case MipsISD::DivRem : return "MipsISD::DivRem"; + case MipsISD::DivRemU : return "MipsISD::DivRemU"; + case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64"; + case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64"; default : return NULL; } } @@ -89,25 +93,22 @@ MipsTargetLowering(MipsTargetMachine &TM) // Mips Custom Operations setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::BlockAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::JumpTable, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::SELECT, MVT::i32, Custom); - setOperationAction(ISD::SETCC, MVT::f32, Custom); - setOperationAction(ISD::SETCC, MVT::f64, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::VASTART, MVT::Other, Custom); - - // We custom lower AND/OR to handle the case where the DAG contain 'ands/ors' - // with operands comming from setcc fp comparions. This is necessary since - // the result from these setcc are in a flag registers (FCR31). - setOperationAction(ISD::AND, MVT::i32, Custom); - setOperationAction(ISD::OR, MVT::i32, Custom); + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); // Operations not directly supported by Mips. setOperationAction(ISD::BR_JT, MVT::Other, Expand); @@ -129,7 +130,9 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FPOWI, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); setOperationAction(ISD::FLOG, MVT::f32, Expand); @@ -139,6 +142,10 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + // Use the default for now setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); @@ -160,6 +167,9 @@ MipsTargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::ADDE); setTargetDAGCombine(ISD::SUBE); + setTargetDAGCombine(ISD::SDIVREM); + setTargetDAGCombine(ISD::UDIVREM); + setTargetDAGCombine(ISD::SETCC); setStackPointerRegisterToSaveRestore(Mips::SP); computeRegisterProperties(); @@ -181,7 +191,7 @@ unsigned MipsTargetLowering::getFunctionAlignment(const Function *) const { // multHi/Lo: product of multiplication // Lo0: initial value of Lo register // Hi0: initial value of Hi register -// Return true if mattern matching was successful. +// Return true if pattern matching was successful. static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) { // ADDENode's second operand must be a flag output of an ADDC node in order // for the matching to be successful. @@ -255,7 +265,7 @@ static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) { // multHi/Lo: product of multiplication // Lo0: initial value of Lo register // Hi0: initial value of Hi register -// Return true if mattern matching was successful. +// Return true if pattern matching was successful. static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) { // SUBENode's second operand must be a flag output of an SUBC node in order // for the matching to be successful. @@ -346,6 +356,130 @@ static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG, return SDValue(); } +static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget* Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + unsigned opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem : + MipsISD::DivRemU; + DebugLoc dl = N->getDebugLoc(); + + SDValue DivRem = DAG.getNode(opc, dl, MVT::Glue, + N->getOperand(0), N->getOperand(1)); + SDValue InChain = DAG.getEntryNode(); + SDValue InGlue = DivRem; + + // insert MFLO + if (N->hasAnyUseOfValue(0)) { + SDValue CopyFromLo = DAG.getCopyFromReg(InChain, dl, Mips::LO, MVT::i32, + InGlue); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyFromLo); + InChain = CopyFromLo.getValue(1); + InGlue = CopyFromLo.getValue(2); + } + + // insert MFHI + if (N->hasAnyUseOfValue(1)) { + SDValue CopyFromHi = DAG.getCopyFromReg(InChain, dl, + Mips::HI, MVT::i32, InGlue); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), CopyFromHi); + } + + return SDValue(); +} + +static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) { + switch (CC) { + default: llvm_unreachable("Unknown fp condition code!"); + case ISD::SETEQ: + case ISD::SETOEQ: return Mips::FCOND_OEQ; + case ISD::SETUNE: return Mips::FCOND_UNE; + case ISD::SETLT: + case ISD::SETOLT: return Mips::FCOND_OLT; + case ISD::SETGT: + case ISD::SETOGT: return Mips::FCOND_OGT; + case ISD::SETLE: + case ISD::SETOLE: return Mips::FCOND_OLE; + case ISD::SETGE: + case ISD::SETOGE: return Mips::FCOND_OGE; + case ISD::SETULT: return Mips::FCOND_ULT; + case ISD::SETULE: return Mips::FCOND_ULE; + case ISD::SETUGT: return Mips::FCOND_UGT; + case ISD::SETUGE: return Mips::FCOND_UGE; + case ISD::SETUO: return Mips::FCOND_UN; + case ISD::SETO: return Mips::FCOND_OR; + case ISD::SETNE: + case ISD::SETONE: return Mips::FCOND_ONE; + case ISD::SETUEQ: return Mips::FCOND_UEQ; + } +} + + +// Returns true if condition code has to be inverted. +static bool InvertFPCondCode(Mips::CondCode CC) { + if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT) + return false; + + if (CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT) + return true; + + assert(false && "Illegal Condition Code"); + return false; +} + +// Creates and returns an FPCmp node from a setcc node. +// Returns Op if setcc is not a floating point comparison. +static SDValue CreateFPCmp(SelectionDAG& DAG, const SDValue& Op) { + // must be a SETCC node + if (Op.getOpcode() != ISD::SETCC) + return Op; + + SDValue LHS = Op.getOperand(0); + + if (!LHS.getValueType().isFloatingPoint()) + return Op; + + SDValue RHS = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + + // Assume the 3rd operand is a CondCodeSDNode. Add code to check the type of + // node if necessary. + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + + return DAG.getNode(MipsISD::FPCmp, dl, MVT::Glue, LHS, RHS, + DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32)); +} + +// Creates and returns a CMovFPT/F node. +static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True, + SDValue False, DebugLoc DL) { + bool invert = InvertFPCondCode((Mips::CondCode) + cast<ConstantSDNode>(Cond.getOperand(2)) + ->getSExtValue()); + + return DAG.getNode((invert ? MipsISD::CMovFP_F : MipsISD::CMovFP_T), DL, + True.getValueType(), True, False, Cond); +} + +static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG& DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget* Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDValue Cond = CreateFPCmp(DAG, SDValue(N, 0)); + + if (Cond.getOpcode() != MipsISD::FPCmp) + return SDValue(); + + SDValue True = DAG.getConstant(1, MVT::i32); + SDValue False = DAG.getConstant(0, MVT::i32); + + return CreateCMovFP(DAG, Cond, True, False, N->getDebugLoc()); +} + SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -357,6 +491,11 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) return PerformADDECombine(N, DAG, DCI, Subtarget); case ISD::SUBE: return PerformSUBECombine(N, DAG, DCI, Subtarget); + case ISD::SDIVREM: + case ISD::UDIVREM: + return PerformDivRemCombine(N, DAG, DCI, Subtarget); + case ISD::SETCC: + return PerformSETCCCombine(N, DAG, DCI, Subtarget); } return SDValue(); @@ -367,17 +506,15 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { - case ISD::AND: return LowerANDOR(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); - case ISD::OR: return LowerANDOR(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); - case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); } return SDValue(); @@ -410,122 +547,110 @@ static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) { return Mips::BRANCH_INVALID; } -static unsigned FPBranchCodeToOpc(Mips::FPBranchCode BC) { - switch(BC) { - default: - llvm_unreachable("Unknown branch code"); - case Mips::BRANCH_T : return Mips::BC1T; - case Mips::BRANCH_F : return Mips::BC1F; - case Mips::BRANCH_TL : return Mips::BC1TL; - case Mips::BRANCH_FL : return Mips::BC1FL; - } -} - -static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) { - switch (CC) { - default: llvm_unreachable("Unknown fp condition code!"); - case ISD::SETEQ: - case ISD::SETOEQ: return Mips::FCOND_EQ; - case ISD::SETUNE: return Mips::FCOND_OGL; - case ISD::SETLT: - case ISD::SETOLT: return Mips::FCOND_OLT; - case ISD::SETGT: - case ISD::SETOGT: return Mips::FCOND_OGT; - case ISD::SETLE: - case ISD::SETOLE: return Mips::FCOND_OLE; - case ISD::SETGE: - case ISD::SETOGE: return Mips::FCOND_OGE; - case ISD::SETULT: return Mips::FCOND_ULT; - case ISD::SETULE: return Mips::FCOND_ULE; - case ISD::SETUGT: return Mips::FCOND_UGT; - case ISD::SETUGE: return Mips::FCOND_UGE; - case ISD::SETUO: return Mips::FCOND_UN; - case ISD::SETO: return Mips::FCOND_OR; - case ISD::SETNE: - case ISD::SETONE: return Mips::FCOND_NEQ; - case ISD::SETUEQ: return Mips::FCOND_UEQ; - } -} - MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { + // There is no need to expand CMov instructions if target has + // conditional moves. + if (Subtarget->hasCondMov()) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); bool isFPCmp = false; DebugLoc dl = MI->getDebugLoc(); + unsigned Opc; switch (MI->getOpcode()) { default: assert(false && "Unexpected instr type to insert"); - case Mips::Select_FCC: - case Mips::Select_FCC_S32: - case Mips::Select_FCC_D32: - isFPCmp = true; // FALL THROUGH - case Mips::Select_CC: - case Mips::Select_CC_S32: - case Mips::Select_CC_D32: { - // To "insert" a SELECT_CC instruction, we actually have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // destination vreg to set, the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - // thisMBB: - // ... - // TrueVal = ... - // setcc r1, r2, r3 - // bNE r1, r0, copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(BB); - - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - - // Emit the right instruction according to the type of the operands compared - if (isFPCmp) { - // Find the condiction code present in the setcc operation. - Mips::CondCode CC = (Mips::CondCode)MI->getOperand(4).getImm(); - // Get the branch opcode from the branch code. - unsigned Opc = FPBranchCodeToOpc(GetFPBranchCodeFromCond(CC)); - BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB); - } else - BuildMI(BB, dl, TII->get(Mips::BNE)).addReg(MI->getOperand(1).getReg()) - .addReg(Mips::ZERO).addMBB(sinkMBB); - - // copy0MBB: - // %FalseValue = ... - // # fallthrough to sinkMBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); - - // sinkMBB: - // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] - // ... - BB = sinkMBB; + case Mips::MOVT: + case Mips::MOVT_S: + case Mips::MOVT_D: + isFPCmp = true; + Opc = Mips::BC1F; + break; + case Mips::MOVF: + case Mips::MOVF_S: + case Mips::MOVF_D: + isFPCmp = true; + Opc = Mips::BC1T; + break; + case Mips::MOVZ_I: + case Mips::MOVZ_S: + case Mips::MOVZ_D: + Opc = Mips::BNE; + break; + case Mips::MOVN_I: + case Mips::MOVN_S: + case Mips::MOVN_D: + Opc = Mips::BEQ; + break; + } + + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + // Emit the right instruction according to the type of the operands compared + if (isFPCmp) + BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB); + else + BuildMI(BB, dl, TII->get(Opc)).addReg(MI->getOperand(2).getReg()) + .addReg(Mips::ZERO).addMBB(sinkMBB); + + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + if (isFPCmp) BuildMI(*BB, BB->begin(), dl, TII->get(Mips::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB) - .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB); + .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB); + else + BuildMI(*BB, BB->begin(), dl, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(3).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; - } - } + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; } //===----------------------------------------------------------------------===// @@ -590,27 +715,6 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const } SDValue MipsTargetLowering:: -LowerANDOR(SDValue Op, SelectionDAG &DAG) const -{ - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); - - if (LHS.getOpcode() != MipsISD::FPCmp || RHS.getOpcode() != MipsISD::FPCmp) - return Op; - - SDValue True = DAG.getConstant(1, MVT::i32); - SDValue False = DAG.getConstant(0, MVT::i32); - - SDValue LSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(), - LHS, True, False, LHS.getOperand(2)); - SDValue RSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(), - RHS, True, False, RHS.getOperand(2)); - - return DAG.getNode(Op.getOpcode(), dl, MVT::i32, LSEL, RSEL); -} - -SDValue MipsTargetLowering:: LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { // The first operand is the chain, the second is the condition, the third is @@ -619,58 +723,32 @@ LowerBRCOND(SDValue Op, SelectionDAG &DAG) const SDValue Dest = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); - if (Op.getOperand(1).getOpcode() != MipsISD::FPCmp) + SDValue CondRes = CreateFPCmp(DAG, Op.getOperand(1)); + + // Return if flag is not set by a floating point comparison. + if (CondRes.getOpcode() != MipsISD::FPCmp) return Op; - SDValue CondRes = Op.getOperand(1); SDValue CCNode = CondRes.getOperand(2); Mips::CondCode CC = (Mips::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue(); SDValue BrCode = DAG.getConstant(GetFPBranchCodeFromCond(CC), MVT::i32); return DAG.getNode(MipsISD::FPBrcond, dl, Op.getValueType(), Chain, BrCode, - Dest, CondRes); -} - -SDValue MipsTargetLowering:: -LowerSETCC(SDValue Op, SelectionDAG &DAG) const -{ - // The operands to this are the left and right operands to compare (ops #0, - // and #1) and the condition code to compare them with (op #2) as a - // CondCodeSDNode. - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); - - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); - - return DAG.getNode(MipsISD::FPCmp, dl, Op.getValueType(), LHS, RHS, - DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32)); + Dest, CondRes); } SDValue MipsTargetLowering:: LowerSELECT(SDValue Op, SelectionDAG &DAG) const { - SDValue Cond = Op.getOperand(0); - SDValue True = Op.getOperand(1); - SDValue False = Op.getOperand(2); - DebugLoc dl = Op.getDebugLoc(); + SDValue Cond = CreateFPCmp(DAG, Op.getOperand(0)); - // if the incomming condition comes from a integer compare, the select - // operation must be SelectCC or a conditional move if the subtarget - // supports it. - if (Cond.getOpcode() != MipsISD::FPCmp) { - if (Subtarget->hasCondMov() && !True.getValueType().isFloatingPoint()) - return Op; - return DAG.getNode(MipsISD::SelectCC, dl, True.getValueType(), - Cond, True, False); - } + // Return if flag is not set by a floating point comparison. + if (Cond.getOpcode() != MipsISD::FPCmp) + return Op; - // if the incomming condition comes from fpcmp, the select - // operation must use FPSelectCC. - SDValue CCNode = Cond.getOperand(2); - return DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(), - Cond, True, False, CCNode); + return CreateCMovFP(DAG, Cond, Op.getOperand(1), Op.getOperand(2), + Op.getDebugLoc()); } SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, @@ -693,12 +771,13 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode); } // %hi/%lo relocation - SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, - MipsII::MO_ABS_HILO); - SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, &GA, 1); - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA); + SDValue GAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, + MipsII::MO_ABS_HI); + SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, + MipsII::MO_ABS_LO); + SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, &GAHi, 1); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GALo); return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); - } else { SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, MipsII::MO_GOT); @@ -707,9 +786,12 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, false, false, 0); // On functions and global targets not internal linked only // a load from got/GP is necessary for PIC to work. - if (!GV->hasLocalLinkage() || isa<Function>(GV)) + if (!GV->hasInternalLinkage() && + (!GV->hasLocalLinkage() || isa<Function>(GV))) return ResNode; - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA); + SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, + MipsII::MO_ABS_LO); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GALo); return DAG.getNode(ISD::ADD, dl, MVT::i32, ResNode, Lo); } @@ -717,6 +799,34 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, return SDValue(0,0); } +SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, + SelectionDAG &DAG) const { + const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + // FIXME there isn't actually debug info here + DebugLoc dl = Op.getDebugLoc(); + + if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { + // %hi/%lo relocation + SDValue BAHi = DAG.getBlockAddress(BA, MVT::i32, true, + MipsII::MO_ABS_HI); + SDValue BALo = DAG.getBlockAddress(BA, MVT::i32, true, + MipsII::MO_ABS_LO); + SDValue Hi = DAG.getNode(MipsISD::Hi, dl, MVT::i32, BAHi); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALo); + return DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo); + } + + SDValue BAGOTOffset = DAG.getBlockAddress(BA, MVT::i32, true, + MipsII::MO_GOT); + SDValue BALOOffset = DAG.getBlockAddress(BA, MVT::i32, true, + MipsII::MO_ABS_LO); + SDValue Load = DAG.getLoad(MVT::i32, dl, + DAG.getEntryNode(), BAGOTOffset, + MachinePointerInfo(), false, false, 0); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALOOffset); + return DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo); +} + SDValue MipsTargetLowering:: LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { @@ -732,7 +842,7 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; - unsigned char OpFlag = IsPIC ? MipsII::MO_GOT : MipsII::MO_ABS_HILO; + unsigned char OpFlag = IsPIC ? MipsII::MO_GOT : MipsII::MO_ABS_HI; EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); @@ -747,7 +857,9 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const MachinePointerInfo(), false, false, 0); - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTI); + SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, + MipsII::MO_ABS_LO); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTILo); ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); return ResNode; @@ -764,7 +876,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const // gp_rel relocation // FIXME: we should reference the constant pool using small data sections, - // but the asm printer currently doens't support this feature without + // but the asm printer currently doesn't support this feature without // hacking it. This feature should come soon so we can uncomment the // stuff below. //if (IsInSmallSection(C->getType())) { @@ -773,18 +885,22 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const // ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode); if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { - SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), - N->getOffset(), MipsII::MO_ABS_HILO); - SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CP); - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP); + SDValue CPHi = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), + N->getOffset(), MipsII::MO_ABS_HI); + SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), + N->getOffset(), MipsII::MO_ABS_LO); + SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CPHi); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo); ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); } else { SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), - N->getOffset(), MipsII::MO_GOT); + N->getOffset(), MipsII::MO_GOT); SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), CP, MachinePointerInfo::getConstantPool(), false, false, 0); - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP); + SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), + N->getOffset(), MipsII::MO_ABS_LO); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo); ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo); } @@ -937,46 +1053,28 @@ static bool CC_MipsO32_VarArgs(unsigned ValNo, MVT ValVT, LocInfo = CCValAssign::AExt; } - if (ValVT == MVT::i32 || ValVT == MVT::f32) { - if (unsigned Reg = State.AllocateReg(IntRegs, IntRegsSize)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, MVT::i32, LocInfo)); - return false; - } - unsigned Off = State.AllocateStack(4, 4); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Off, LocVT, LocInfo)); - return false; - } - - unsigned UnallocIntReg = State.getFirstUnallocated(IntRegs, IntRegsSize); - if (ValVT == MVT::f64) { - if (IntRegs[UnallocIntReg] == (unsigned (Mips::A1))) { - // A1 can't be used anymore, because 64 bit arguments - // must be aligned when copied back to the caller stack - State.AllocateReg(IntRegs, IntRegsSize); - UnallocIntReg++; - } - - if (IntRegs[UnallocIntReg] == (unsigned (Mips::A0)) || - IntRegs[UnallocIntReg] == (unsigned (Mips::A2))) { - unsigned Reg = State.AllocateReg(IntRegs, IntRegsSize); - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, MVT::i32, LocInfo)); - // Shadow the next register so it can be used - // later to get the other 32bit part. - State.AllocateReg(IntRegs, IntRegsSize); - return false; - } + unsigned Reg; - // Register is shadowed to preserve alignment, and the - // argument goes to a stack location. - if (UnallocIntReg != IntRegsSize) - State.AllocateReg(IntRegs, IntRegsSize); + if (ValVT == MVT::i32 || ValVT == MVT::f32) { + Reg = State.AllocateReg(IntRegs, IntRegsSize); + LocVT = MVT::i32; + } else if (ValVT == MVT::f64) { + Reg = State.AllocateReg(IntRegs, IntRegsSize); + if (Reg == Mips::A1 || Reg == Mips::A3) + Reg = State.AllocateReg(IntRegs, IntRegsSize); + State.AllocateReg(IntRegs, IntRegsSize); + LocVT = MVT::i32; + } else + llvm_unreachable("Cannot handle this ValVT."); - unsigned Off = State.AllocateStack(8, 8); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Off, LocVT, LocInfo)); - return false; - } + if (!Reg) { + unsigned SizeInBytes = ValVT.getSizeInBits() >> 3; + unsigned Offset = State.AllocateStack(SizeInBytes, SizeInBytes); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + } else + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - return true; // CC didn't match + return false; // CC must always match } //===----------------------------------------------------------------------===// @@ -1043,11 +1141,12 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i32) Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); if (VA.getValVT() == MVT::f64 && VA.getLocVT() == MVT::i32) { - Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); - SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg, - DAG.getConstant(0, getPointerTy())); - SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg, - DAG.getConstant(1, getPointerTy())); + SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32, + Arg, DAG.getConstant(0, MVT::i32)); + SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32, + Arg, DAG.getConstant(1, MVT::i32)); + if (!Subtarget->isLittle()) + std::swap(Lo, Hi); RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo)); RegsToPass.push_back(std::make_pair(VA.getLocReg()+1, Hi)); continue; @@ -1100,7 +1199,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. - // The InFlag in necessary since all emited instructions must be + // The InFlag in necessary since all emitted instructions must be // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { @@ -1113,12 +1212,52 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. unsigned char OpFlag = IsPIC ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG; - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, - getPointerTy(), 0, OpFlag); - else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) + bool LoadSymAddr = false; + SDValue CalleeLo; + + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + if (IsPIC && G->getGlobal()->hasInternalLinkage()) { + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, + getPointerTy(), 0,MipsII:: MO_GOT); + CalleeLo = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), + 0, MipsII::MO_ABS_LO); + } else { + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, + getPointerTy(), 0, OpFlag); + } + + LoadSymAddr = true; + } + else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), OpFlag); + LoadSymAddr = true; + } + + // Create nodes that load address of callee and copy it to T9 + if (IsPIC) { + if (LoadSymAddr) { + // Load callee address + SDValue LoadValue = DAG.getLoad(MVT::i32, dl, Chain, Callee, + MachinePointerInfo::getGOT(), + false, false, 0); + + // Use GOT+LO if callee has internal linkage. + if (CalleeLo.getNode()) { + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CalleeLo); + Callee = DAG.getNode(ISD::ADD, dl, MVT::i32, LoadValue, Lo); + } else + Callee = LoadValue; + + // Use chain output from LoadValue + Chain = LoadValue.getValue(1); + } + + // copy to T9 + Chain = DAG.getCopyToReg(Chain, dl, Mips::T9, Callee, SDValue(0, 0)); + InFlag = Chain.getValue(1); + Callee = DAG.getRegister(Mips::T9, MVT::i32); + } // MipsJmpLink = #chain, #target_address, #opt_in_flags... // = Chain, Callee, Reg#1, Reg#2, ... @@ -1143,7 +1282,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Create a stack location to hold GP when PIC is used. This stack // location is used on function prologue to save GP and also after all - // emited CALL's to restore GP. + // emitted CALL's to restore GP. if (IsPIC) { // Function can have an arbitrary number of calls, so // hold the LastArgStackLoc with the biggest offset. @@ -1218,18 +1357,18 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, /// and generate load operations for arguments places on the stack. SDValue MipsTargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> - &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> + &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF); MipsFI->setVarArgsFrameIndex(0); // Used with vargs to acumulate store chains. @@ -1249,9 +1388,9 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, else CCInfo.AnalyzeFormalArguments(Ins, CC_Mips); - SDValue StackPtr; - unsigned FirstStackArgLoc = (Subtarget->isABI_EABI() ? 0 : 16); + unsigned LastStackArgEndOffset = 0; + EVT LastRegArgValVT; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -1260,6 +1399,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, if (VA.isRegLoc()) { EVT RegVT = VA.getLocVT(); ArgRegEnd = VA.getLocReg(); + LastRegArgValVT = VA.getValVT(); TargetRegisterClass *RC = 0; if (RegVT == MVT::i32) @@ -1300,8 +1440,10 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg()+1, RC); SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT); - SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, ArgValue2, ArgValue); - ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Pair); + if (!Subtarget->isLittle()) + std::swap(ArgValue, ArgValue2); + ArgValue = DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64, + ArgValue, ArgValue2); } } @@ -1321,10 +1463,10 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // used instead of a direct negative address (which is recorded to // be used on emitPrologue) to avoid mis-calc of the first stack // offset on PEI::calculateFrameObjectOffsets. - // Arguments are always 32-bit. - unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; + unsigned ArgSize = VA.getValVT().getSizeInBits()/8; + LastStackArgEndOffset = FirstStackArgLoc + VA.getLocMemOffset() + ArgSize; int FI = MFI->CreateFixedObject(ArgSize, 0, true); - MipsFI->recordLoadArgsFI(FI, -(ArgSize+ + MipsFI->recordLoadArgsFI(FI, -(4 + (FirstStackArgLoc + VA.getLocMemOffset()))); // Create load nodes to retrieve arguments from the stack @@ -1351,29 +1493,52 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // To meet ABI, when VARARGS are passed on registers, the registers // must have their values written to the caller stack frame. If the last // argument was placed in the stack, there's no need to save any register. - if ((isVarArg) && (Subtarget->isABI_O32() && ArgRegEnd)) { - if (StackPtr.getNode() == 0) - StackPtr = DAG.getRegister(StackReg, getPointerTy()); - - // The last register argument that must be saved is Mips::A3 - TargetRegisterClass *RC = Mips::CPURegsRegisterClass; - unsigned StackLoc = ArgLocs.size()-1; - - for (++ArgRegEnd; ArgRegEnd <= Mips::A3; ++ArgRegEnd, ++StackLoc) { - unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC); - SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32); - - int FI = MFI->CreateFixedObject(4, 0, true); - MipsFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4))); - SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); - OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, - MachinePointerInfo(), - false, false, 0)); - - // Record the frame index of the first variable argument - // which is a value necessary to VASTART. - if (!MipsFI->getVarArgsFrameIndex()) + if (isVarArg && Subtarget->isABI_O32()) { + if (ArgRegEnd) { + // Last named formal argument is passed in register. + + // The last register argument that must be saved is Mips::A3 + TargetRegisterClass *RC = Mips::CPURegsRegisterClass; + if (LastRegArgValVT == MVT::f64) + ArgRegEnd++; + + if (ArgRegEnd < Mips::A3) { + // Both the last named formal argument and the first variable + // argument are passed in registers. + for (++ArgRegEnd; ArgRegEnd <= Mips::A3; ++ArgRegEnd) { + unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC); + SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32); + + int FI = MFI->CreateFixedObject(4, 0, true); + MipsFI->recordStoreVarArgsFI(FI, -(4+(ArgRegEnd-Mips::A0)*4)); + SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); + OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, + MachinePointerInfo(), + false, false, 0)); + + // Record the frame index of the first variable argument + // which is a value necessary to VASTART. + if (!MipsFI->getVarArgsFrameIndex()) { + MFI->setObjectAlignment(FI, 4); + MipsFI->setVarArgsFrameIndex(FI); + } + } + } else { + // Last named formal argument is in register Mips::A3, and the first + // variable argument is on stack. Record the frame index of the first + // variable argument. + int FI = MFI->CreateFixedObject(4, 0, true); + MFI->setObjectAlignment(FI, 4); + MipsFI->recordStoreVarArgsFI(FI, -20); MipsFI->setVarArgsFrameIndex(FI); + } + } else { + // Last named formal argument and all the variable arguments are passed + // on stack. Record the frame index of the first variable argument. + int FI = MFI->CreateFixedObject(4, 0, true); + MFI->setObjectAlignment(FI, 4); + MipsFI->recordStoreVarArgsFI(FI, -(4+LastStackArgEndOffset)); + MipsFI->setVarArgsFrameIndex(FI); } } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 9d6b9f3..e4d0c3d 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -31,45 +31,50 @@ namespace llvm { // Get the Higher 16 bits from a 32-bit immediate // No relation with Mips Hi register - Hi, + Hi, // Get the Lower 16 bits from a 32-bit immediate // No relation with Mips Lo register - Lo, + Lo, // Handle gp_rel (small data/bss sections) relocation. GPRel, - // Select CC Pseudo Instruction - SelectCC, - - // Floating Point Select CC Pseudo Instruction - FPSelectCC, - // Floating Point Branch Conditional FPBrcond, // Floating Point Compare FPCmp, + // Floating Point Conditional Moves + CMovFP_T, + CMovFP_F, + // Floating Point Rounding FPRound, - // Return + // Return Ret, // MAdd/Sub nodes MAdd, MAddu, MSub, - MSubu + MSubu, + + // DivRem(u) + DivRem, + DivRemU, + + BuildPairF64, + ExtractElementF64 }; } //===--------------------------------------------------------------------===// // TargetLowering Implementation //===--------------------------------------------------------------------===// - + class MipsTargetLowering : public TargetLowering { public: explicit MipsTargetLowering(MipsTargetMachine &TM); @@ -77,7 +82,7 @@ namespace llvm { /// LowerOperation - Provide custom lowering hooks for some operations. virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; - /// getTargetNodeName - This method returns the name of a target specific + /// getTargetNodeName - This method returns the name of a target specific // DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; @@ -87,7 +92,7 @@ namespace llvm { /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; - virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; private: // Subtarget Info const MipsSubtarget *Subtarget; @@ -101,16 +106,15 @@ namespace llvm { SmallVectorImpl<SDValue> &InVals) const; // Lower Operand specifics - SDValue LowerANDOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; virtual SDValue @@ -149,7 +153,7 @@ namespace llvm { ConstraintWeight getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const; - std::pair<unsigned, const TargetRegisterClass*> + std::pair<unsigned, const TargetRegisterClass*> getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 977e0df..a86c5c7 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -24,19 +24,28 @@ //===----------------------------------------------------------------------===// // Floating Point Compare and Branch -def SDT_MipsFPBrcond : SDTypeProfile<0, 3, [SDTCisSameAs<0, 2>, SDTCisInt<0>, - SDTCisVT<1, OtherVT>]>; -def SDT_MipsFPCmp : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, - SDTCisSameAs<1, 2>, SDTCisFP<1>, - SDTCisInt<3>]>; -def SDT_MipsFPSelectCC : SDTypeProfile<1, 4, [SDTCisInt<1>, SDTCisInt<4>, - SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>; - +def SDT_MipsFPBrcond : SDTypeProfile<0, 2, [SDTCisInt<0>, + SDTCisVT<1, OtherVT>]>; +def SDT_MipsFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<1>, + SDTCisInt<2>]>; +def SDT_MipsCMovFP : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>]>; +def SDT_MipsBuildPairF64 : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, + SDTCisVT<1, i32>, + SDTCisSameAs<1, 2>]>; +def SDT_MipsExtractElementF64 : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, + SDTCisVT<1, f64>, + SDTCisVT<0, i32>]>; + +def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp, [SDNPOutGlue]>; +def MipsCMovFP_T : SDNode<"MipsISD::CMovFP_T", SDT_MipsCMovFP, [SDNPInGlue]>; +def MipsCMovFP_F : SDNode<"MipsISD::CMovFP_F", SDT_MipsCMovFP, [SDNPInGlue]>; def MipsFPRound : SDNode<"MipsISD::FPRound", SDTFPRoundOp, [SDNPOptInGlue]>; -def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond, - [SDNPHasChain]>; -def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp>; -def MipsFPSelectCC : SDNode<"MipsISD::FPSelectCC", SDT_MipsFPSelectCC>; +def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond, + [SDNPHasChain, SDNPOptInGlue]>; +def MipsBuildPairF64 : SDNode<"MipsISD::BuildPairF64", SDT_MipsBuildPairF64>; +def MipsExtractElementF64 : SDNode<"MipsISD::ExtractElementF64", + SDT_MipsExtractElementF64>; // Operand for printing out a condition code. let PrintMethod = "printFCCOperand" in @@ -54,7 +63,7 @@ def IsNotMipsI : Predicate<"!Subtarget.isMips1()">; //===----------------------------------------------------------------------===// // Instruction Class Templates // -// A set of multiclasses is used to address the register usage. +// A set of multiclasses is used to address the register usage. // // S32 - single precision in 16 32bit even fp registers // single precision in 32 32bit fp registers in SingleOnly mode @@ -65,7 +74,7 @@ def IsNotMipsI : Predicate<"!Subtarget.isMips1()">; // Only S32 and D32 are supported right now. //===----------------------------------------------------------------------===// -multiclass FFR1_1<bits<6> funct, string asmstr> +multiclass FFR1_1<bits<6> funct, string asmstr> { def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs), !strconcat(asmstr, ".s $fd, $fs"), []>; @@ -74,31 +83,31 @@ multiclass FFR1_1<bits<6> funct, string asmstr> !strconcat(asmstr, ".d $fd, $fs"), []>, Requires<[In32BitMode]>; } -multiclass FFR1_2<bits<6> funct, string asmstr, SDNode FOp> +multiclass FFR1_2<bits<6> funct, string asmstr, SDNode FOp> { def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs), - !strconcat(asmstr, ".s $fd, $fs"), + !strconcat(asmstr, ".s $fd, $fs"), [(set FGR32:$fd, (FOp FGR32:$fs))]>; def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs), - !strconcat(asmstr, ".d $fd, $fs"), + !strconcat(asmstr, ".d $fd, $fs"), [(set AFGR64:$fd, (FOp AFGR64:$fs))]>, Requires<[In32BitMode]>; } -class FFR1_3<bits<6> funct, bits<5> fmt, RegisterClass RcSrc, - RegisterClass RcDst, string asmstr>: - FFR<0x11, funct, fmt, (outs RcSrc:$fd), (ins RcDst:$fs), - !strconcat(asmstr, " $fd, $fs"), []>; +class FFR1_3<bits<6> funct, bits<5> fmt, RegisterClass RcSrc, + RegisterClass RcDst, string asmstr>: + FFR<0x11, funct, fmt, (outs RcSrc:$fd), (ins RcDst:$fs), + !strconcat(asmstr, " $fd, $fs"), []>; multiclass FFR1_4<bits<6> funct, string asmstr, SDNode FOp> { - def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), - (ins FGR32:$fs, FGR32:$ft), + def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), + (ins FGR32:$fs, FGR32:$ft), !strconcat(asmstr, ".s $fd, $fs, $ft"), [(set FGR32:$fd, (FOp FGR32:$fs, FGR32:$ft))]>; - def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd), - (ins AFGR64:$fs, AFGR64:$ft), + def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd), + (ins AFGR64:$fs, AFGR64:$ft), !strconcat(asmstr, ".d $fd, $fs, $ft"), [(set AFGR64:$fd, (FOp AFGR64:$fs, AFGR64:$ft))]>, Requires<[In32BitMode]>; @@ -115,8 +124,8 @@ let ft = 0 in { defm TRUNC_W : FFR1_1<0b001101, "trunc.w">; defm CVTW : FFR1_1<0b100100, "cvt.w">; - defm FABS : FFR1_2<0b000101, "abs", fabs>; - defm FNEG : FFR1_2<0b000111, "neg", fneg>; + defm FABS : FFR1_2<0b000101, "abs", fabs>; + defm FNEG : FFR1_2<0b000111, "neg", fneg>; defm FSQRT : FFR1_2<0b000100, "sqrt", fsqrt>; /// Convert to Single Precison @@ -140,23 +149,23 @@ let ft = 0 in { def TRUNC_LD : FFR1_3<0b001001, 0x1, AFGR64, AFGR64, "trunc.l">; /// Convert to long signed integer - def CVTL_S : FFR1_3<0b100101, 0x0, FGR32, FGR32, "cvt.l">; - def CVTL_D : FFR1_3<0b100101, 0x1, AFGR64, AFGR64, "cvt.l">; - - /// Convert to Double Precison - def CVTD_S32 : FFR1_3<0b100001, 0x0, AFGR64, FGR32, "cvt.d.s">; - def CVTD_W32 : FFR1_3<0b100001, 0x2, AFGR64, FGR32, "cvt.d.w">; - def CVTD_L32 : FFR1_3<0b100001, 0x3, AFGR64, AFGR64, "cvt.d.l">; - + def CVTL_S : FFR1_3<0b100101, 0x0, FGR32, FGR32, "cvt.l">; + def CVTL_D : FFR1_3<0b100101, 0x1, AFGR64, AFGR64, "cvt.l">; + + /// Convert to Double Precison + def CVTD_S32 : FFR1_3<0b100001, 0x0, AFGR64, FGR32, "cvt.d.s">; + def CVTD_W32 : FFR1_3<0b100001, 0x2, AFGR64, FGR32, "cvt.d.w">; + def CVTD_L32 : FFR1_3<0b100001, 0x3, AFGR64, AFGR64, "cvt.d.l">; + /// Convert to Single Precison def CVTS_D32 : FFR1_3<0b100000, 0x1, FGR32, AFGR64, "cvt.s.d">; - def CVTS_L32 : FFR1_3<0b100000, 0x3, FGR32, AFGR64, "cvt.s.l">; + def CVTS_L32 : FFR1_3<0b100000, 0x3, FGR32, AFGR64, "cvt.s.l">; } } // The odd-numbered registers are only referenced when doing loads, // stores, and moves between floating-point and integer registers. -// When defining instructions, we reference all 32-bit registers, +// When defining instructions, we reference all 32-bit registers, // regardless of register aliasing. let fd = 0 in { /// Move Control Registers From/To CPU Registers @@ -165,7 +174,7 @@ let fd = 0 in { def CTC1 : FFR<0x11, 0x0, 0x6, (outs CCR:$rt), (ins CPURegs:$fs), "ctc1 $fs, $rt", []>; - + def MFC1 : FFR<0x11, 0x00, 0x00, (outs CPURegs:$rt), (ins FGR32:$fs), "mfc1 $rt, $fs", []>; @@ -180,18 +189,18 @@ def FMOV_D32 : FFR<0x11, 0b000110, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs), /// Floating Point Memory Instructions let Predicates = [IsNotSingleFloat, IsNotMipsI] in { - def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr), + def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr), "ldc1 $ft, $addr", [(set AFGR64:$ft, (load addr:$addr))]>; - def SDC1 : FFI<0b111101, (outs), (ins AFGR64:$ft, mem:$addr), + def SDC1 : FFI<0b111101, (outs), (ins AFGR64:$ft, mem:$addr), "sdc1 $ft, $addr", [(store AFGR64:$ft, addr:$addr)]>; } -// LWC1 and SWC1 can always be emited with odd registers. +// LWC1 and SWC1 can always be emitted with odd registers. def LWC1 : FFI<0b110001, (outs FGR32:$ft), (ins mem:$addr), "lwc1 $ft, $addr", - [(set FGR32:$ft, (load addr:$addr))]>; + [(set FGR32:$ft, (load addr:$addr))]>; def SWC1 : FFI<0b111001, (outs), (ins FGR32:$ft, mem:$addr), "swc1 $ft, $addr", - [(store FGR32:$ft, addr:$addr)]>; + [(store FGR32:$ft, addr:$addr)]>; /// Floating-point Aritmetic defm FADD : FFR1_4<0x10, "add", fadd>; @@ -202,7 +211,7 @@ defm FSUB : FFR1_4<0x01, "sub", fsub>; //===----------------------------------------------------------------------===// // Floating Point Branch Codes //===----------------------------------------------------------------------===// -// Mips branch codes. These correspond to condcode in MipsInstrInfo.h. +// Mips branch codes. These correspond to condcode in MipsInstrInfo.h. // They must be kept in synch. def MIPS_BRANCH_F : PatLeaf<(i32 0)>; def MIPS_BRANCH_T : PatLeaf<(i32 1)>; @@ -210,11 +219,11 @@ def MIPS_BRANCH_FL : PatLeaf<(i32 2)>; def MIPS_BRANCH_TL : PatLeaf<(i32 3)>; /// Floating Point Branch of False/True (Likely) -let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in { - class FBRANCH<PatLeaf op, string asmstr> : FFI<0x11, (outs), +let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in + class FBRANCH<PatLeaf op, string asmstr> : FFI<0x11, (outs), (ins brtarget:$dst), !strconcat(asmstr, " $dst"), - [(MipsFPBrcond op, bb:$dst, FCR31)]>; -} + [(MipsFPBrcond op, bb:$dst)]>; + def BC1F : FBRANCH<MIPS_BRANCH_F, "bc1f">; def BC1T : FBRANCH<MIPS_BRANCH_T, "bc1t">; def BC1FL : FBRANCH<MIPS_BRANCH_FL, "bc1fl">; @@ -223,11 +232,11 @@ def BC1TL : FBRANCH<MIPS_BRANCH_TL, "bc1tl">; //===----------------------------------------------------------------------===// // Floating Point Flag Conditions //===----------------------------------------------------------------------===// -// Mips condition codes. They must correspond to condcode in MipsInstrInfo.h. +// Mips condition codes. They must correspond to condcode in MipsInstrInfo.h. // They must be kept in synch. def MIPS_FCOND_F : PatLeaf<(i32 0)>; def MIPS_FCOND_UN : PatLeaf<(i32 1)>; -def MIPS_FCOND_EQ : PatLeaf<(i32 2)>; +def MIPS_FCOND_OEQ : PatLeaf<(i32 2)>; def MIPS_FCOND_UEQ : PatLeaf<(i32 3)>; def MIPS_FCOND_OLT : PatLeaf<(i32 4)>; def MIPS_FCOND_ULT : PatLeaf<(i32 5)>; @@ -245,44 +254,90 @@ def MIPS_FCOND_NGT : PatLeaf<(i32 15)>; /// Floating Point Compare let hasDelaySlot = 1, Defs=[FCR31] in { def FCMP_S32 : FCC<0x0, (outs), (ins FGR32:$fs, FGR32:$ft, condcode:$cc), - "c.$cc.s $fs, $ft", - [(set FCR31, (MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc))]>; - + "c.$cc.s $fs, $ft", + [(MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc)]>; + def FCMP_D32 : FCC<0x1, (outs), (ins AFGR64:$fs, AFGR64:$ft, condcode:$cc), - "c.$cc.d $fs, $ft", - [(set FCR31, (MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc))]>, - Requires<[In32BitMode]>; + "c.$cc.d $fs, $ft", + [(MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc)]>, + Requires<[In32BitMode]>; } -//===----------------------------------------------------------------------===// -// Floating Point Pseudo-Instructions -//===----------------------------------------------------------------------===// -// For some explanation, see Select_CC at MipsInstrInfo.td. We also embedd a -// condiciton code to enable easy handling by the Custom Inserter. -let usesCustomInserter = 1, Uses=[FCR31] in { - class PseudoFPSelCC<RegisterClass RC, string asmstr> : - MipsPseudo<(outs RC:$dst), - (ins CPURegs:$CmpRes, RC:$T, RC:$F, condcode:$cc), asmstr, - [(set RC:$dst, (MipsFPSelectCC CPURegs:$CmpRes, RC:$T, RC:$F, - imm:$cc))]>; +// Conditional moves: +// These instructions are expanded in +// MipsISelLowering::EmitInstrWithCustomInserter if target does not have +// conditional move instructions. +// flag:int, data:float +let usesCustomInserter = 1, Constraints = "$F = $dst" in +class CondMovIntFP<RegisterClass RC, bits<5> fmt, bits<6> func, + string instr_asm> : + FFR<0x11, func, fmt, (outs RC:$dst), (ins RC:$T, CPURegs:$cond, RC:$F), + !strconcat(instr_asm, "\t$dst, $T, $cond"), []>; + +def MOVZ_S : CondMovIntFP<FGR32, 16, 18, "movz.s">; +def MOVN_S : CondMovIntFP<FGR32, 16, 19, "movn.s">; + +let Predicates = [In32BitMode] in { + def MOVZ_D : CondMovIntFP<AFGR64, 17, 18, "movz.d">; + def MOVN_D : CondMovIntFP<AFGR64, 17, 19, "movn.d">; } -// The values to be selected are fp but the condition test is with integers. -def Select_CC_S32 : PseudoSelCC<FGR32, "# MipsSelect_CC_S32_f32">; -def Select_CC_D32 : PseudoSelCC<AFGR64, "# MipsSelect_CC_D32_f32">, - Requires<[In32BitMode]>; +defm : MovzPats<FGR32, MOVZ_S>; +defm : MovnPats<FGR32, MOVN_S>; + +let Predicates = [In32BitMode] in { + defm : MovzPats<AFGR64, MOVZ_D>; + defm : MovnPats<AFGR64, MOVN_D>; +} -// The values to be selected are int but the condition test is done with fp. -def Select_FCC : PseudoFPSelCC<CPURegs, "# MipsSelect_FCC">; +let usesCustomInserter = 1, Uses = [FCR31], Constraints = "$F = $dst" in { +// flag:float, data:int +class CondMovFPInt<SDNode cmov, bits<1> tf, string instr_asm> : + FCMOV<tf, (outs CPURegs:$dst), (ins CPURegs:$T, CPURegs:$F), + !strconcat(instr_asm, "\t$dst, $T, $$fcc0"), + [(set CPURegs:$dst, (cmov CPURegs:$T, CPURegs:$F))]>; + +// flag:float, data:float +class CondMovFPFP<RegisterClass RC, SDNode cmov, bits<5> fmt, bits<1> tf, + string instr_asm> : + FFCMOV<fmt, tf, (outs RC:$dst), (ins RC:$T, RC:$F), + !strconcat(instr_asm, "\t$dst, $T, $$fcc0"), + [(set RC:$dst, (cmov RC:$T, RC:$F))]>; +} -// The values to be selected and the condition test is done with fp. -def Select_FCC_S32 : PseudoFPSelCC<FGR32, "# MipsSelect_FCC_S32_f32">; -def Select_FCC_D32 : PseudoFPSelCC<AFGR64, "# MipsSelect_FCC_D32_f32">, - Requires<[In32BitMode]>; +def MOVT : CondMovFPInt<MipsCMovFP_T, 1, "movt">; +def MOVF : CondMovFPInt<MipsCMovFP_F, 0, "movf">; +def MOVT_S : CondMovFPFP<FGR32, MipsCMovFP_T, 16, 1, "movt.s">; +def MOVF_S : CondMovFPFP<FGR32, MipsCMovFP_F, 16, 0, "movf.s">; -def MOVCCRToCCR : MipsPseudo<(outs CCR:$dst), (ins CCR:$src), - "# MOVCCRToCCR", []>; +let Predicates = [In32BitMode] in { + def MOVT_D : CondMovFPFP<AFGR64, MipsCMovFP_T, 17, 1, "movt.d">; + def MOVF_D : CondMovFPFP<AFGR64, MipsCMovFP_F, 17, 0, "movf.d">; +} + +//===----------------------------------------------------------------------===// +// Floating Point Pseudo-Instructions +//===----------------------------------------------------------------------===// +def MOVCCRToCCR : MipsPseudo<(outs CCR:$dst), (ins CCR:$src), + "# MOVCCRToCCR", []>; + +// This pseudo instr gets expanded into 2 mtc1 instrs after register +// allocation. +def BuildPairF64 : + MipsPseudo<(outs AFGR64:$dst), + (ins CPURegs:$lo, CPURegs:$hi), "", + [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>; + +// This pseudo instr gets expanded into 2 mfc1 instrs after register +// allocation. +// if n is 0, lower part of src is extracted. +// if n is 1, higher part of src is extracted. +def ExtractElementF64 : + MipsPseudo<(outs CPURegs:$dst), + (ins AFGR64:$src, i32imm:$n), "", + [(set CPURegs:$dst, + (MipsExtractElementF64 AFGR64:$src, imm:$n))]>; //===----------------------------------------------------------------------===// // Floating Point Patterns @@ -306,7 +361,7 @@ def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S32 FGR32:$src))>; def : Pat<(i32 (bitconvert FGR32:$src)), (MFC1 FGR32:$src)>; def : Pat<(f32 (bitconvert CPURegs:$src)), (MTC1 CPURegs:$src)>; -let Predicates = [In32BitMode] in { +let Predicates = [In32BitMode] in { def : Pat<(f32 (fround AFGR64:$src)), (CVTS_D32 AFGR64:$src)>; def : Pat<(f64 (fextend FGR32:$src)), (CVTD_S32 FGR32:$src)>; } diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 98ae2fa..9dfcdfb 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -22,8 +22,8 @@ //===----------------------------------------------------------------------===// // Generic Mips Format -class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern, - InstrItinClass itin>: Instruction +class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern, + InstrItinClass itin>: Instruction { field bits<32> Inst; @@ -32,8 +32,8 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern, bits<6> opcode; // Top 5 bits are the 'opcode' field - let Inst{31-26} = opcode; - + let Inst{31-26} = opcode; + dag OutOperandList = outs; dag InOperandList = ins; @@ -52,7 +52,7 @@ class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>: class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst<outs, ins, asmstr, pattern, itin> + MipsInst<outs, ins, asmstr, pattern, itin> { bits<5> rd; bits<5> rs; @@ -64,7 +64,7 @@ class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr, let funct = _funct; let Inst{25-21} = rs; - let Inst{20-16} = rt; + let Inst{20-16} = rt; let Inst{15-11} = rd; let Inst{10-6} = shamt; let Inst{5-0} = funct; @@ -75,7 +75,7 @@ class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr, //===----------------------------------------------------------------------===// class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, - InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin> + InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin> { bits<5> rt; bits<5> rs; @@ -84,7 +84,7 @@ class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, let opcode = op; let Inst{25-21} = rs; - let Inst{20-16} = rt; + let Inst{20-16} = rt; let Inst{15-0} = imm16; } @@ -93,12 +93,12 @@ class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, //===----------------------------------------------------------------------===// class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, - InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin> + InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin> { bits<26> addr; let opcode = op; - + let Inst{25-0} = addr; } @@ -119,9 +119,9 @@ class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, // Format FR instruction class in Mips : <|opcode|fmt|ft|fs|fd|funct|> //===----------------------------------------------------------------------===// -class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins, - string asmstr, list<dag> pattern> : - MipsInst<outs, ins, asmstr, pattern, NoItinerary> +class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins, + string asmstr, list<dag> pattern> : + MipsInst<outs, ins, asmstr, pattern, NoItinerary> { bits<5> fd; bits<5> fs; @@ -134,7 +134,7 @@ class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins, let fmt = _fmt; let Inst{25-21} = fmt; - let Inst{20-16} = ft; + let Inst{20-16} = ft; let Inst{15-11} = fs; let Inst{10-6} = fd; let Inst{5-0} = funct; @@ -144,8 +144,8 @@ class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins, // Format FI instruction class in Mips : <|opcode|base|ft|immediate|> //===----------------------------------------------------------------------===// -class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>: - MipsInst<outs, ins, asmstr, pattern, NoItinerary> +class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>: + MipsInst<outs, ins, asmstr, pattern, NoItinerary> { bits<5> ft; bits<5> base; @@ -154,7 +154,7 @@ class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>: let opcode = op; let Inst{25-21} = base; - let Inst{20-16} = ft; + let Inst{20-16} = ft; let Inst{15-0} = imm16; } @@ -162,8 +162,8 @@ class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>: // Compare instruction class in Mips : <|010001|fmt|ft|fs|0000011|condcode|> //===----------------------------------------------------------------------===// -class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> : - MipsInst<outs, ins, asmstr, pattern, NoItinerary> +class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> : + MipsInst<outs, ins, asmstr, pattern, NoItinerary> { bits<5> fs; bits<5> ft; @@ -174,9 +174,54 @@ class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> : let fmt = _fmt; let Inst{25-21} = fmt; - let Inst{20-16} = ft; + let Inst{20-16} = ft; let Inst{15-11} = fs; let Inst{10-6} = 0; let Inst{5-4} = 0b11; let Inst{3-0} = cc; } + + +class FCMOV<bits<1> _tf, dag outs, dag ins, string asmstr, + list<dag> pattern> : + MipsInst<outs, ins, asmstr, pattern, NoItinerary> +{ + bits<5> rd; + bits<5> rs; + bits<3> N; + bits<1> tf; + + let opcode = 0; + let tf = _tf; + + let Inst{25-21} = rs; + let Inst{20-18} = N; + let Inst{17} = 0; + let Inst{16} = tf; + let Inst{15-11} = rd; + let Inst{10-6} = 0; + let Inst{5-0} = 1; +} + +class FFCMOV<bits<5> _fmt, bits<1> _tf, dag outs, dag ins, string asmstr, + list<dag> pattern> : + MipsInst<outs, ins, asmstr, pattern, NoItinerary> +{ + bits<5> fd; + bits<5> fs; + bits<3> N; + bits<5> fmt; + bits<1> tf; + + let opcode = 17; + let fmt = _fmt; + let tf = _tf; + + let Inst{25-21} = fmt; + let Inst{20-18} = N; + let Inst{17} = 0; + let Inst{16} = tf; + let Inst{15-11} = fs; + let Inst{10-6} = fd; + let Inst{5-0} = 17; +}
\ No newline at end of file diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index aaf307b..be044fa 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -36,7 +36,7 @@ static bool isZeroImm(const MachineOperand &op) { /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than loading from the stack slot. unsigned MipsInstrInfo:: -isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const +isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { if ((MI->getOpcode() == Mips::LW) || (MI->getOpcode() == Mips::LWC1) || (MI->getOpcode() == Mips::LDC1)) { @@ -57,7 +57,7 @@ isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than storing to the stack slot. unsigned MipsInstrInfo:: -isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const +isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { if ((MI->getOpcode() == Mips::SW) || (MI->getOpcode() == Mips::SWC1) || (MI->getOpcode() == Mips::SDC1)) { @@ -74,7 +74,7 @@ isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const /// insertNoop - If data hazard condition is found insert the target nop /// instruction. void MipsInstrInfo:: -insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const +insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { DebugLoc DL; BuildMI(MBB, MI, DL, get(Mips::NOP)); @@ -136,7 +136,7 @@ copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(KillSrc)); return; } - + if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) { BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); @@ -153,13 +153,13 @@ copyPhysReg(MachineBasicBlock &MBB, void MipsInstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned SrcReg, bool isKill, int FI, + unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); - if (RC == Mips::CPURegsRegisterClass) + if (RC == Mips::CPURegsRegisterClass) BuildMI(MBB, I, DL, get(Mips::SW)).addReg(SrcReg, getKillRegState(isKill)) .addImm(0).addFrameIndex(FI); else if (RC == Mips::FGR32RegisterClass) @@ -171,7 +171,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addReg(SrcReg, getKillRegState(isKill)) .addImm(0).addFrameIndex(FI); } else { - const TargetRegisterInfo *TRI = + const TargetRegisterInfo *TRI = MBB.getParent()->getTarget().getRegisterInfo(); const unsigned *SubSet = TRI->getSubRegisters(SrcReg); BuildMI(MBB, I, DL, get(Mips::SWC1)) @@ -189,12 +189,12 @@ void MipsInstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); - if (RC == Mips::CPURegsRegisterClass) + if (RC == Mips::CPURegsRegisterClass) BuildMI(MBB, I, DL, get(Mips::LW), DestReg).addImm(0).addFrameIndex(FI); else if (RC == Mips::FGR32RegisterClass) BuildMI(MBB, I, DL, get(Mips::LWC1), DestReg).addImm(0).addFrameIndex(FI); @@ -202,7 +202,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (!TM.getSubtarget<MipsSubtarget>().isMips1()) { BuildMI(MBB, I, DL, get(Mips::LDC1), DestReg).addImm(0).addFrameIndex(FI); } else { - const TargetRegisterInfo *TRI = + const TargetRegisterInfo *TRI = MBB.getParent()->getTarget().getRegisterInfo(); const unsigned *SubSet = TRI->getSubRegisters(DestReg); BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[0]) @@ -218,281 +218,202 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // Branch Analysis //===----------------------------------------------------------------------===// -/// GetCondFromBranchOpc - Return the Mips CC that matches -/// the correspondent Branch instruction opcode. -static Mips::CondCode GetCondFromBranchOpc(unsigned BrOpc) -{ - switch (BrOpc) { - default: return Mips::COND_INVALID; - case Mips::BEQ : return Mips::COND_E; - case Mips::BNE : return Mips::COND_NE; - case Mips::BGTZ : return Mips::COND_GZ; - case Mips::BGEZ : return Mips::COND_GEZ; - case Mips::BLTZ : return Mips::COND_LZ; - case Mips::BLEZ : return Mips::COND_LEZ; - - // We dont do fp branch analysis yet! - case Mips::BC1T : - case Mips::BC1F : return Mips::COND_INVALID; - } +static unsigned GetAnalyzableBrOpc(unsigned Opc) { + return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ || + Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ || + Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::J) ? Opc : 0; } -/// GetCondBranchFromCond - Return the Branch instruction -/// opcode that matches the cc. -unsigned Mips::GetCondBranchFromCond(Mips::CondCode CC) +/// GetOppositeBranchOpc - Return the inverse of the specified +/// opcode, e.g. turning BEQ to BNE. +unsigned Mips::GetOppositeBranchOpc(unsigned Opc) { - switch (CC) { - default: llvm_unreachable("Illegal condition code!"); - case Mips::COND_E : return Mips::BEQ; - case Mips::COND_NE : return Mips::BNE; - case Mips::COND_GZ : return Mips::BGTZ; - case Mips::COND_GEZ : return Mips::BGEZ; - case Mips::COND_LZ : return Mips::BLTZ; - case Mips::COND_LEZ : return Mips::BLEZ; - - case Mips::FCOND_F: - case Mips::FCOND_UN: - case Mips::FCOND_EQ: - case Mips::FCOND_UEQ: - case Mips::FCOND_OLT: - case Mips::FCOND_ULT: - case Mips::FCOND_OLE: - case Mips::FCOND_ULE: - case Mips::FCOND_SF: - case Mips::FCOND_NGLE: - case Mips::FCOND_SEQ: - case Mips::FCOND_NGL: - case Mips::FCOND_LT: - case Mips::FCOND_NGE: - case Mips::FCOND_LE: - case Mips::FCOND_NGT: return Mips::BC1T; - - case Mips::FCOND_T: - case Mips::FCOND_OR: - case Mips::FCOND_NEQ: - case Mips::FCOND_OGL: - case Mips::FCOND_UGE: - case Mips::FCOND_OGE: - case Mips::FCOND_UGT: - case Mips::FCOND_OGT: - case Mips::FCOND_ST: - case Mips::FCOND_GLE: - case Mips::FCOND_SNE: - case Mips::FCOND_GL: - case Mips::FCOND_NLT: - case Mips::FCOND_GE: - case Mips::FCOND_NLE: - case Mips::FCOND_GT: return Mips::BC1F; + switch (Opc) { + default: llvm_unreachable("Illegal opcode!"); + case Mips::BEQ : return Mips::BNE; + case Mips::BNE : return Mips::BEQ; + case Mips::BGTZ : return Mips::BLEZ; + case Mips::BGEZ : return Mips::BLTZ; + case Mips::BLTZ : return Mips::BGEZ; + case Mips::BLEZ : return Mips::BGTZ; + case Mips::BC1T : return Mips::BC1F; + case Mips::BC1F : return Mips::BC1T; } } -/// GetOppositeBranchCondition - Return the inverse of the specified -/// condition, e.g. turning COND_E to COND_NE. -Mips::CondCode Mips::GetOppositeBranchCondition(Mips::CondCode CC) -{ - switch (CC) { - default: llvm_unreachable("Illegal condition code!"); - case Mips::COND_E : return Mips::COND_NE; - case Mips::COND_NE : return Mips::COND_E; - case Mips::COND_GZ : return Mips::COND_LEZ; - case Mips::COND_GEZ : return Mips::COND_LZ; - case Mips::COND_LZ : return Mips::COND_GEZ; - case Mips::COND_LEZ : return Mips::COND_GZ; - case Mips::FCOND_F : return Mips::FCOND_T; - case Mips::FCOND_UN : return Mips::FCOND_OR; - case Mips::FCOND_EQ : return Mips::FCOND_NEQ; - case Mips::FCOND_UEQ: return Mips::FCOND_OGL; - case Mips::FCOND_OLT: return Mips::FCOND_UGE; - case Mips::FCOND_ULT: return Mips::FCOND_OGE; - case Mips::FCOND_OLE: return Mips::FCOND_UGT; - case Mips::FCOND_ULE: return Mips::FCOND_OGT; - case Mips::FCOND_SF: return Mips::FCOND_ST; - case Mips::FCOND_NGLE:return Mips::FCOND_GLE; - case Mips::FCOND_SEQ: return Mips::FCOND_SNE; - case Mips::FCOND_NGL: return Mips::FCOND_GL; - case Mips::FCOND_LT: return Mips::FCOND_NLT; - case Mips::FCOND_NGE: return Mips::FCOND_GE; - case Mips::FCOND_LE: return Mips::FCOND_NLE; - case Mips::FCOND_NGT: return Mips::FCOND_GT; - } +static void AnalyzeCondBr(const MachineInstr* Inst, unsigned Opc, + MachineBasicBlock *&BB, + SmallVectorImpl<MachineOperand>& Cond) { + assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch"); + int NumOp = Inst->getNumExplicitOperands(); + + // for both int and fp branches, the last explicit operand is the + // MBB. + BB = Inst->getOperand(NumOp-1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(Opc)); + + for (int i=0; i<NumOp-1; i++) + Cond.push_back(Inst->getOperand(i)); } -bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, +bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const + bool AllowModify) const { - // If the block has no terminators, it just falls into the block after it. - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) + MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend(); + + // Skip all the debug instructions. + while (I != REnd && I->isDebugValue()) + ++I; + + if (I == REnd || !isUnpredicatedTerminator(&*I)) { + // If this block ends with no branches (it just falls through to its succ) + // just return false, leaving TBB/FBB null. + TBB = FBB = NULL; return false; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return false; - --I; } - if (!isUnpredicatedTerminator(I)) - return false; - - // Get the last instruction in the block. - MachineInstr *LastInst = I; - - // If there is only one terminator instruction, process it. + + MachineInstr *LastInst = &*I; unsigned LastOpc = LastInst->getOpcode(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - if (!LastInst->getDesc().isBranch()) + + // Not an analyzable branch (must be an indirect jump). + if (!GetAnalyzableBrOpc(LastOpc)) + return true; + + // Get the second to last instruction in the block. + unsigned SecondLastOpc = 0; + MachineInstr *SecondLastInst = NULL; + + if (++I != REnd) { + SecondLastInst = &*I; + SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode()); + + // Not an analyzable branch (must be an indirect jump). + if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc) return true; + } + // If there is only one terminator instruction, process it. + if (!SecondLastOpc) { // Unconditional branch if (LastOpc == Mips::J) { TBB = LastInst->getOperand(0).getMBB(); return false; } - Mips::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode()); - if (BranchCode == Mips::COND_INVALID) - return true; // Can't handle indirect branch. - // Conditional branch - // Block ends with fall-through condbranch. - if (LastOpc != Mips::COND_INVALID) { - int LastNumOp = LastInst->getNumOperands(); - - TBB = LastInst->getOperand(LastNumOp-1).getMBB(); - Cond.push_back(MachineOperand::CreateImm(BranchCode)); - - for (int i=0; i<LastNumOp-1; i++) { - Cond.push_back(LastInst->getOperand(i)); - } - - return false; - } + AnalyzeCondBr(LastInst, LastOpc, TBB, Cond); + return false; } - - // Get the instruction before it if it is a terminator. - MachineInstr *SecondLastInst = I; - + + // If we reached here, there are two branches. // If there are three terminators, we don't know what sort of block this is. - if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) + if (++I != REnd && isUnpredicatedTerminator(&*I)) return true; - // If the block ends with Mips::J and a Mips::BNE/Mips::BEQ, handle it. - unsigned SecondLastOpc = SecondLastInst->getOpcode(); - Mips::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc); + // If second to last instruction is an unconditional branch, + // analyze it and remove the last instruction. + if (SecondLastOpc == Mips::J) { + // Return if the last instruction cannot be removed. + if (!AllowModify) + return true; - if (BranchCode != Mips::COND_INVALID && LastOpc == Mips::J) { - int SecondNumOp = SecondLastInst->getNumOperands(); + TBB = SecondLastInst->getOperand(0).getMBB(); + LastInst->eraseFromParent(); + return false; + } - TBB = SecondLastInst->getOperand(SecondNumOp-1).getMBB(); - Cond.push_back(MachineOperand::CreateImm(BranchCode)); + // Conditional branch followed by an unconditional branch. + // The last one must be unconditional. + if (LastOpc != Mips::J) + return true; - for (int i=0; i<SecondNumOp-1; i++) { - Cond.push_back(SecondLastInst->getOperand(i)); - } + AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond); + FBB = LastInst->getOperand(0).getMBB(); - FBB = LastInst->getOperand(0).getMBB(); - return false; - } + return false; +} - // If the block ends with two unconditional branches, handle it. The last - // one is not executed, so remove it. - if ((SecondLastOpc == Mips::J) && (LastOpc == Mips::J)) { - TBB = SecondLastInst->getOperand(0).getMBB(); - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return false; - } +void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, DebugLoc DL, + const SmallVectorImpl<MachineOperand>& Cond) + const { + unsigned Opc = Cond[0].getImm(); + const TargetInstrDesc &TID = get(Opc); + MachineInstrBuilder MIB = BuildMI(&MBB, DL, TID); - // Otherwise, can't handle this. - return true; + for (unsigned i = 1; i < Cond.size(); ++i) + MIB.addReg(Cond[i].getReg()); + + MIB.addMBB(TBB); } unsigned MipsInstrInfo:: -InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, +InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); - assert((Cond.size() == 3 || Cond.size() == 2 || Cond.size() == 0) && - "Mips branch conditions can have two|three components!"); - if (FBB == 0) { // One way branch. - if (Cond.empty()) { - // Unconditional branch? - BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB); - } else { - // Conditional branch. - unsigned Opc = GetCondBranchFromCond((Mips::CondCode)Cond[0].getImm()); - const TargetInstrDesc &TID = get(Opc); - - if (TID.getNumOperands() == 3) - BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()) - .addReg(Cond[2].getReg()) - .addMBB(TBB); - else - BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()) - .addMBB(TBB); - - } - return 1; - } - - // Two-way Conditional branch. - unsigned Opc = GetCondBranchFromCond((Mips::CondCode)Cond[0].getImm()); - const TargetInstrDesc &TID = get(Opc); + // # of condition operands: + // Unconditional branches: 0 + // Floating point branches: 1 (opc) + // Int BranchZero: 2 (opc, reg) + // Int Branch: 3 (opc, reg0, reg1) + assert((Cond.size() <= 3) && + "# of Mips branch conditions must be <= 3!"); - if (TID.getNumOperands() == 3) - BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()).addReg(Cond[2].getReg()) - .addMBB(TBB); - else - BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()).addMBB(TBB); + // Two-way Conditional branch. + if (FBB) { + BuildCondBr(MBB, TBB, DL, Cond); + BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB); + return 2; + } - BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB); - return 2; + // One way branch. + // Unconditional branch. + if (Cond.empty()) + BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB); + else // Conditional branch. + BuildCondBr(MBB, TBB, DL, Cond); + return 1; } unsigned MipsInstrInfo:: -RemoveBranch(MachineBasicBlock &MBB) const +RemoveBranch(MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) return 0; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return 0; - --I; - } - if (I->getOpcode() != Mips::J && - GetCondFromBranchOpc(I->getOpcode()) == Mips::COND_INVALID) - return 0; - - // Remove the branch. - I->eraseFromParent(); - - I = MBB.end(); - - if (I == MBB.begin()) return 1; - --I; - if (GetCondFromBranchOpc(I->getOpcode()) == Mips::COND_INVALID) - return 1; - - // Remove the branch. - I->eraseFromParent(); - return 2; + MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend(); + MachineBasicBlock::reverse_iterator FirstBr; + unsigned removed; + + // Skip all the debug instructions. + while (I != REnd && I->isDebugValue()) + ++I; + + FirstBr = I; + + // Up to 2 branches are removed. + // Note that indirect branches are not removed. + for(removed = 0; I != REnd && removed < 2; ++I, ++removed) + if (!GetAnalyzableBrOpc(I->getOpcode())) + break; + + MBB.erase(I.base(), FirstBr.base()); + + return removed; } -/// ReverseBranchCondition - Return the inverse opcode of the +/// ReverseBranchCondition - Return the inverse opcode of the /// specified Branch instruction. bool MipsInstrInfo:: -ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const +ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { - assert( (Cond.size() == 3 || Cond.size() == 2) && + assert( (Cond.size() && Cond.size() <= 3) && "Invalid Mips branch condition!"); - Cond[0].setImm(GetOppositeBranchCondition((Mips::CondCode)Cond[0].getImm())); + Cond[0].setImm(Mips::GetOppositeBranchOpc(Cond[0].getImm())); return false; } diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 52a3d39..5fdbf1f 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -37,7 +37,7 @@ namespace Mips { // To be used with float branch True FCOND_F, FCOND_UN, - FCOND_EQ, + FCOND_OEQ, FCOND_UEQ, FCOND_OLT, FCOND_ULT, @@ -57,8 +57,8 @@ namespace Mips { // above ones, but are used with a branch False; FCOND_T, FCOND_OR, - FCOND_NEQ, - FCOND_OGL, + FCOND_UNE, + FCOND_ONE, FCOND_UGE, FCOND_OGE, FCOND_UGT, @@ -70,27 +70,15 @@ namespace Mips { FCOND_NLT, FCOND_GE, FCOND_NLE, - FCOND_GT, - - // Only integer conditions - COND_E, - COND_GZ, - COND_GEZ, - COND_LZ, - COND_LEZ, - COND_NE, - COND_INVALID + FCOND_GT }; - - // Turn condition code into conditional branch opcode. - unsigned GetCondBranchFromCond(CondCode CC); - /// GetOppositeBranchCondition - Return the inverse of the specified cond, - /// e.g. turning COND_E to COND_NE. - CondCode GetOppositeBranchCondition(Mips::CondCode CC); + /// GetOppositeBranchOpc - Return the inverse of the specified + /// opcode, e.g. turning BEQ to BNE. + unsigned GetOppositeBranchOpc(unsigned Opc); /// MipsCCToString - Map each FP condition code to its string - inline static const char *MipsFCCToString(Mips::CondCode CC) + inline static const char *MipsFCCToString(Mips::CondCode CC) { switch (CC) { default: llvm_unreachable("Unknown condition code"); @@ -98,10 +86,10 @@ namespace Mips { case FCOND_T: return "f"; case FCOND_UN: case FCOND_OR: return "un"; - case FCOND_EQ: - case FCOND_NEQ: return "eq"; + case FCOND_OEQ: + case FCOND_UNE: return "eq"; case FCOND_UEQ: - case FCOND_OGL: return "ueq"; + case FCOND_ONE: return "ueq"; case FCOND_OLT: case FCOND_UGE: return "olt"; case FCOND_ULT: @@ -121,11 +109,11 @@ namespace Mips { case FCOND_LT: case FCOND_NLT: return "lt"; case FCOND_NGE: - case FCOND_GE: return "ge"; + case FCOND_GE: return "nge"; case FCOND_LE: - case FCOND_NLE: return "nle"; + case FCOND_NLE: return "le"; case FCOND_NGT: - case FCOND_GT: return "gt"; + case FCOND_GT: return "ngt"; } } } @@ -138,27 +126,27 @@ namespace MipsII { enum TOF { //===------------------------------------------------------------------===// // Mips Specific MachineOperand flags. - + MO_NO_FLAG, /// MO_GOT - Represents the offset into the global offset table at which /// the address the relocation entry symbol resides during execution. MO_GOT, - /// MO_GOT_CALL - Represents the offset into the global offset table at - /// which the address of a call site relocation entry symbol resides + /// MO_GOT_CALL - Represents the offset into the global offset table at + /// which the address of a call site relocation entry symbol resides /// during execution. This is different from the above since this flag /// can only be present in call instructions. MO_GOT_CALL, - /// MO_GPREL - Represents the offset from the current gp value to be used + /// MO_GPREL - Represents the offset from the current gp value to be used /// for the relocatable object file being produced. MO_GPREL, - /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol - /// address. - MO_ABS_HILO - + /// MO_ABS_HI/LO - Represents the hi or low part of an absolute symbol + /// address. + MO_ABS_HI, + MO_ABS_LO }; } @@ -181,7 +169,7 @@ public: /// any side effects other than loading from the stack slot. virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; - + /// isStoreToStackSlot - If the specified machine instruction is a direct /// store to a stack slot, return the virtual or physical register number of /// the source reg along with the FrameIndex of the loaded stack slot. If @@ -189,13 +177,19 @@ public: /// any side effects other than storing to the stack slot. virtual unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; - + /// Branch Analysis virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const; virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + +private: + void BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL, + const SmallVectorImpl<MachineOperand>& Cond) const; + +public: virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, @@ -220,7 +214,7 @@ public: bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; /// Insert nop instruction when hazard condition is found - virtual void insertNoop(MachineBasicBlock &MBB, + virtual void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; /// getGlobalBaseReg - Return a virtual register initialized with the diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index b70266a..19b9c35 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -19,18 +19,19 @@ include "MipsInstrFormats.td" def SDT_MipsRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_MipsJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>; -def SDT_MipsSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, - SDTCisSameAs<2, 3>, SDTCisInt<1>]>; def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, - SDTCisSameAs<1, 2>, SDTCisSameAs<3, 4>, - SDTCisInt<4>]>; + SDTCisSameAs<1, 2>, + SDTCisSameAs<3, 4>, + SDTCisInt<4>]>; def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>; def SDT_MipsCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; -def SDT_MipsMAddMSub : SDTypeProfile<0, 4, +def SDT_MipsMAddMSub : SDTypeProfile<0, 4, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, - SDTCisSameAs<1, 2>, + SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>]>; - +def SDT_MipsDivRem : SDTypeProfile<0, 2, + [SDTCisVT<0, i32>, + SDTCisSameAs<0, 1>]>; // Call def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink, @@ -54,9 +55,6 @@ def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart, def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -// Select Condition Code -def MipsSelectCC : SDNode<"MipsISD::SelectCC", SDT_MipsSelectCC>; - // MAdd*/MSub* nodes def MipsMAdd : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub, [SDNPOptInGlue, SDNPOutGlue]>; @@ -67,6 +65,12 @@ def MipsMSub : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub, def MipsMSubu : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub, [SDNPOptInGlue, SDNPOutGlue]>; +// DivRem(u) nodes +def MipsDivRem : SDNode<"MipsISD::DivRem", SDT_MipsDivRem, + [SDNPOutGlue]>; +def MipsDivRemU : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem, + [SDNPOutGlue]>; + //===----------------------------------------------------------------------===// // Mips Instruction Predicate Definitions. //===----------------------------------------------------------------------===// @@ -165,7 +169,7 @@ class ArithOverflowI<bits<6> op, string instr_asm, SDNode OpNode, let rd = 0, shamt = 0, Defs = [HI, LO], Uses = [HI, LO] in class MArithR<bits<6> func, string instr_asm, SDNode op> : FR<0x1c, func, (outs), (ins CPURegs:$rs, CPURegs:$rt), - !strconcat(instr_asm, "\t$rs, $rt"), + !strconcat(instr_asm, "\t$rs, $rt"), [(op CPURegs:$rs, CPURegs:$rt, LO, HI)], IIImul>; // Logical @@ -185,7 +189,7 @@ class LogicNOR<bits<6> op, bits<6> func, string instr_asm>: [(set CPURegs:$dst, (not (or CPURegs:$b, CPURegs:$c)))], IIAlu>; // Shifts -class LogicR_shift_rotate_imm<bits<6> func, bits<5> _rs, string instr_asm, +class LogicR_shift_rotate_imm<bits<6> func, bits<5> _rs, string instr_asm, SDNode OpNode>: FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, shamt:$c), !strconcat(instr_asm, "\t$dst, $b, $c"), @@ -193,7 +197,7 @@ class LogicR_shift_rotate_imm<bits<6> func, bits<5> _rs, string instr_asm, let rs = _rs; } -class LogicR_shift_rotate_reg<bits<6> func, bits<5> _shamt, string instr_asm, +class LogicR_shift_rotate_reg<bits<6> func, bits<5> _shamt, string instr_asm, SDNode OpNode>: FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b), !strconcat(instr_asm, "\t$dst, $b, $c"), @@ -283,9 +287,16 @@ let isCall=1, hasDelaySlot=1, } // Mul, Div -class MulDiv<bits<6> func, string instr_asm, InstrItinClass itin>: - FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b), - !strconcat(instr_asm, "\t$a, $b"), [], itin>; +let Defs = [HI, LO] in { + class Mul<bits<6> func, string instr_asm, InstrItinClass itin>: + FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b), + !strconcat(instr_asm, "\t$a, $b"), [], itin>; + + class Div<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>: + FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b), + !strconcat(instr_asm, "\t$$zero, $a, $b"), + [(op CPURegs:$a, CPURegs:$b)], itin>; +} // Move from Hi/Lo class MoveFromLOHI<bits<6> func, string instr_asm>: @@ -348,6 +359,11 @@ def REORDER : MipsPseudo<(outs), (ins), ".set\treorder", []>; def NOMACRO : MipsPseudo<(outs), (ins), ".set\tnomacro", []>; def NOREORDER : MipsPseudo<(outs), (ins), ".set\tnoreorder", []>; +// These macros are inserted to prevent GAS from complaining +// when using the AT register. +def NOAT : MipsPseudo<(outs), (ins), ".set\tnoat", []>; +def ATMACRO : MipsPseudo<(outs), (ins), ".set\tat", []>; + // When handling PIC code the assembler needs .cpload and .cprestore // directives. If the real instructions corresponding these directives // are used, we have the same behavior, but get also a bunch of warnings @@ -355,18 +371,6 @@ def NOREORDER : MipsPseudo<(outs), (ins), ".set\tnoreorder", []>; def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>; def CPRESTORE : MipsPseudo<(outs), (ins uimm16:$loc), ".cprestore\t$loc\n", []>; -// The supported Mips ISAs dont have any instruction close to the SELECT_CC -// operation. The solution is to create a Mips pseudo SELECT_CC instruction -// (MipsSelectCC), use LowerSELECT_CC to generate this instruction and finally -// replace it for real supported nodes into EmitInstrWithCustomInserter -let usesCustomInserter = 1 in { - class PseudoSelCC<RegisterClass RC, string asmstr>: - MipsPseudo<(outs RC:$dst), (ins CPURegs:$CmpRes, RC:$T, RC:$F), asmstr, - [(set RC:$dst, (MipsSelectCC CPURegs:$CmpRes, RC:$T, RC:$F))]>; -} - -def Select_CC : PseudoSelCC<CPURegs, "# MipsSelect_CC_i32">; - //===----------------------------------------------------------------------===// // Instruction definition //===----------------------------------------------------------------------===// @@ -447,12 +451,10 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, "jr\t$target", [(MipsRet CPURegs:$target)], IIBranch>; /// Multiply and Divide Instructions. -let Defs = [HI, LO] in { - def MULT : MulDiv<0x18, "mult", IIImul>; - def MULTu : MulDiv<0x19, "multu", IIImul>; - def DIV : MulDiv<0x1a, "div", IIIdiv>; - def DIVu : MulDiv<0x1b, "divu", IIIdiv>; -} +def MULT : Mul<0x18, "mult", IIImul>; +def MULTu : Mul<0x19, "multu", IIImul>; +def SDIV : Div<MipsDivRem, 0x1a, "div", IIIdiv>; +def UDIV : Div<MipsDivRemU, 0x1b, "divu", IIIdiv>; let Defs = [HI] in def MTHI : MoveToLOHI<0x11, "mthi">; @@ -489,10 +491,19 @@ let Predicates = [HasSwap] in { def MIPS_CMOV_ZERO : PatLeaf<(i32 0)>; def MIPS_CMOV_NZERO : PatLeaf<(i32 1)>; -let Predicates = [HasCondMov], Constraints = "$F = $dst" in { - def MOVN : CondMov<0x0a, "movn", MIPS_CMOV_NZERO>; - def MOVZ : CondMov<0x0b, "movz", MIPS_CMOV_ZERO>; -} +// Conditional moves: +// These instructions are expanded in +// MipsISelLowering::EmitInstrWithCustomInserter if target does not have +// conditional move instructions. +// flag:int, data:int +let usesCustomInserter = 1, shamt = 0, Constraints = "$F = $dst" in + class CondMovIntInt<bits<6> funct, string instr_asm> : + FR<0, funct, (outs CPURegs:$dst), + (ins CPURegs:$T, CPURegs:$cond, CPURegs:$F), + !strconcat(instr_asm, "\t$dst, $T, $cond"), [], NoItinerary>; + +def MOVZ_I : CondMovIntInt<0x0a, "movz">; +def MOVN_I : CondMovIntInt<0x0b, "movn">; /// No operation let addr=0 in @@ -533,7 +544,7 @@ def : Pat<(subc CPURegs:$lhs, CPURegs:$rhs), (SUBu CPURegs:$lhs, CPURegs:$rhs)>; def : Pat<(addc CPURegs:$lhs, CPURegs:$rhs), (ADDu CPURegs:$lhs, CPURegs:$rhs)>; -def : Pat<(addc CPURegs:$src, imm:$imm), +def : Pat<(addc CPURegs:$src, immSExt16:$imm), (ADDiu CPURegs:$src, imm:$imm)>; // Call @@ -546,8 +557,11 @@ def : Pat<(MipsJmpLink (i32 texternalsym:$dst)), // hi/lo relocs def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>; +def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>; def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)), (ADDiu CPURegs:$hi, tglobaladdr:$lo)>; +def : Pat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)), + (ADDiu CPURegs:$hi, tblockaddress:$lo)>; def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>; def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)), @@ -599,33 +613,43 @@ def : Pat<(brcond CPURegs:$cond, bb:$dst), (BNE CPURegs:$cond, ZERO, bb:$dst)>; // select patterns -def : Pat<(select (setge CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F), - (MOVZ CPURegs:$F, CPURegs:$T, (SLT CPURegs:$lhs, CPURegs:$rhs))>; -def : Pat<(select (setuge CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F), - (MOVZ CPURegs:$F, CPURegs:$T, (SLTu CPURegs:$lhs, CPURegs:$rhs))>; -def : Pat<(select (setge CPURegs:$lhs, immSExt16:$rhs), CPURegs:$T, CPURegs:$F), - (MOVZ CPURegs:$F, CPURegs:$T, (SLTi CPURegs:$lhs, immSExt16:$rhs))>; -def : Pat<(select (setuge CPURegs:$lh, immSExt16:$rh), CPURegs:$T, CPURegs:$F), - (MOVZ CPURegs:$F, CPURegs:$T, (SLTiu CPURegs:$lh, immSExt16:$rh))>; - -def : Pat<(select (setle CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F), - (MOVZ CPURegs:$F, CPURegs:$T, (SLT CPURegs:$rhs, CPURegs:$lhs))>; -def : Pat<(select (setule CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F), - (MOVZ CPURegs:$F, CPURegs:$T, (SLTu CPURegs:$rhs, CPURegs:$lhs))>; - -def : Pat<(select (seteq CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F), - (MOVZ CPURegs:$F, CPURegs:$T, (XOR CPURegs:$lhs, CPURegs:$rhs))>; -def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F), - (MOVN CPURegs:$F, CPURegs:$T, (XOR CPURegs:$lhs, CPURegs:$rhs))>; - -def : Pat<(select CPURegs:$cond, CPURegs:$T, CPURegs:$F), - (MOVN CPURegs:$F, CPURegs:$T, CPURegs:$cond)>; +multiclass MovzPats<RegisterClass RC, Instruction MOVZInst> { + def : Pat<(select (setge CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + (MOVZInst RC:$T, (SLT CPURegs:$lhs, CPURegs:$rhs), RC:$F)>; + def : Pat<(select (setuge CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + (MOVZInst RC:$T, (SLTu CPURegs:$lhs, CPURegs:$rhs), RC:$F)>; + def : Pat<(select (setge CPURegs:$lhs, immSExt16:$rhs), RC:$T, RC:$F), + (MOVZInst RC:$T, (SLTi CPURegs:$lhs, immSExt16:$rhs), RC:$F)>; + def : Pat<(select (setuge CPURegs:$lh, immSExt16:$rh), RC:$T, RC:$F), + (MOVZInst RC:$T, (SLTiu CPURegs:$lh, immSExt16:$rh), RC:$F)>; + def : Pat<(select (setle CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + (MOVZInst RC:$T, (SLT CPURegs:$rhs, CPURegs:$lhs), RC:$F)>; + def : Pat<(select (setule CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + (MOVZInst RC:$T, (SLTu CPURegs:$rhs, CPURegs:$lhs), RC:$F)>; + def : Pat<(select (seteq CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + (MOVZInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>; + def : Pat<(select (seteq CPURegs:$lhs, 0), RC:$T, RC:$F), + (MOVZInst RC:$T, CPURegs:$lhs, RC:$F)>; +} + +multiclass MovnPats<RegisterClass RC, Instruction MOVNInst> { + def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + (MOVNInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>; + def : Pat<(select CPURegs:$cond, RC:$T, RC:$F), + (MOVNInst RC:$T, CPURegs:$cond, RC:$F)>; + def : Pat<(select (setne CPURegs:$lhs, 0), RC:$T, RC:$F), + (MOVNInst RC:$T, CPURegs:$lhs, RC:$F)>; +} + +defm : MovzPats<CPURegs, MOVZ_I>; +defm : MovnPats<CPURegs, MOVN_I>; // select patterns with got access -def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), - (i32 tglobaladdr:$T), CPURegs:$F), - (MOVN CPURegs:$F, (ADDiu GP, tglobaladdr:$T), - (XOR CPURegs:$lhs, CPURegs:$rhs))>; +let AddedComplexity = 10 in + def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), + (i32 tglobaladdr:$T), CPURegs:$F), + (MOVN_I CPURegs:$F, (ADDiu GP, tglobaladdr:$T), + (XOR CPURegs:$lhs, CPURegs:$rhs))>; // setcc patterns def : Pat<(seteq CPURegs:$lhs, CPURegs:$rhs), diff --git a/lib/Target/Mips/MipsMCAsmInfo.h b/lib/Target/Mips/MipsMCAsmInfo.h index 15a867e..41b7192 100644 --- a/lib/Target/Mips/MipsMCAsmInfo.h +++ b/lib/Target/Mips/MipsMCAsmInfo.h @@ -19,7 +19,7 @@ namespace llvm { class Target; - + class MipsMCAsmInfo : public MCAsmInfo { public: explicit MipsMCAsmInfo(const Target &T, StringRef TT); diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 3719e58..c09b129 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -38,7 +38,7 @@ using namespace llvm; -MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST, +MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST, const TargetInstrInfo &tii) : MipsGenRegisterInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP), Subtarget(ST), TII(tii) {} @@ -46,7 +46,7 @@ MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST, /// getRegisterNumbering - Given the enum value for some register, e.g. /// Mips::RA, return the number that it corresponds to (e.g. 31). unsigned MipsRegisterInfo:: -getRegisterNumbering(unsigned RegEnum) +getRegisterNumbering(unsigned RegEnum) { switch (RegEnum) { case Mips::ZERO : case Mips::F0 : case Mips::D0 : return 0; @@ -82,30 +82,30 @@ getRegisterNumbering(unsigned RegEnum) case Mips::FP : case Mips::F30: case Mips::D15: return 30; case Mips::RA : case Mips::F31: return 31; default: llvm_unreachable("Unknown register number!"); - } + } return 0; // Not reached } unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; } //===----------------------------------------------------------------------===// -// Callee Saved Registers methods +// Callee Saved Registers methods //===----------------------------------------------------------------------===// /// Mips Callee Saved Registers const unsigned* MipsRegisterInfo:: -getCalleeSavedRegs(const MachineFunction *MF) const +getCalleeSavedRegs(const MachineFunction *MF) const { // Mips callee-save register range is $16-$23, $f20-$f30 static const unsigned SingleFloatOnlyCalleeSavedRegs[] = { - Mips::S0, Mips::S1, Mips::S2, Mips::S3, + Mips::S0, Mips::S1, Mips::S2, Mips::S3, Mips::S4, Mips::S5, Mips::S6, Mips::S7, - Mips::F20, Mips::F21, Mips::F22, Mips::F23, Mips::F24, Mips::F25, + Mips::F20, Mips::F21, Mips::F22, Mips::F23, Mips::F24, Mips::F25, Mips::F26, Mips::F27, Mips::F28, Mips::F29, Mips::F30, 0 }; static const unsigned BitMode32CalleeSavedRegs[] = { - Mips::S0, Mips::S1, Mips::S2, Mips::S3, + Mips::S0, Mips::S1, Mips::S2, Mips::S3, Mips::S4, Mips::S5, Mips::S6, Mips::S7, Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30, 0 }; @@ -132,11 +132,11 @@ getReservedRegs(const MachineFunction &MF) const { if (!Subtarget.isSingleFloat()) for (unsigned FReg=(Mips::F0)+1; FReg < Mips::F30; FReg+=2) Reserved.set(FReg); - + return Reserved; } -// This function eliminate ADJCALLSTACKDOWN, +// This function eliminate ADJCALLSTACKDOWN, // ADJCALLSTACKUP pseudo instructions void MipsRegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, @@ -157,7 +157,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned i = 0; while (!MI.getOperand(i).isFI()) { ++i; - assert(i < MI.getNumOperands() && + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); } @@ -179,8 +179,43 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); - MI.getOperand(i-1).ChangeToImmediate(Offset); - MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false); + unsigned NewReg = 0; + int NewImm = 0; + MachineBasicBlock &MBB = *MI.getParent(); + bool ATUsed; + unsigned OrigReg = getFrameRegister(MF); + int OrigImm = Offset; + +// OrigImm fits in the 16-bit field + if (OrigImm < 0x8000 && OrigImm >= -0x8000) { + NewReg = OrigReg; + NewImm = OrigImm; + ATUsed = false; + } + else { + const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); + DebugLoc DL = II->getDebugLoc(); + int ImmLo = OrigImm & 0xffff; + int ImmHi = (((unsigned)OrigImm & 0xffff0000) >> 16) + + ((OrigImm & 0x8000) != 0); + + // FIXME: change this when mips goes MC". + BuildMI(MBB, II, DL, TII->get(Mips::NOAT)); + BuildMI(MBB, II, DL, TII->get(Mips::LUi), Mips::AT).addImm(ImmHi); + BuildMI(MBB, II, DL, TII->get(Mips::ADDu), Mips::AT).addReg(OrigReg) + .addReg(Mips::AT); + NewReg = Mips::AT; + NewImm = ImmLo; + + ATUsed = true; + } + + // FIXME: change this when mips goes MC". + if (ATUsed) + BuildMI(MBB, ++II, MI.getDebugLoc(), TII.get(Mips::ATMACRO)); + + MI.getOperand(i).ChangeToRegister(NewReg, false); + MI.getOperand(i-1).ChangeToImmediate(NewImm); } void MipsRegisterInfo:: diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index a7f4bf9..767359f 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -26,7 +26,7 @@ class Type; struct MipsRegisterInfo : public MipsGenRegisterInfo { const MipsSubtarget &Subtarget; const TargetInstrInfo &TII; - + MipsRegisterInfo(const MipsSubtarget &Subtarget, const TargetInstrInfo &tii); /// getRegisterNumbering - Given the enum value for some register, e.g. diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index 60efe31..9f9cae7 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -17,7 +17,7 @@ class MipsReg<string n> : Register<n> { let Namespace = "Mips"; } -class MipsRegWithSubRegs<string n, list<Register> subregs> +class MipsRegWithSubRegs<string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> { field bits<5> Num; let Namespace = "Mips"; @@ -83,7 +83,7 @@ let Namespace = "Mips" in { def SP : MipsGPRReg< 29, "SP">, DwarfRegNum<[29]>; def FP : MipsGPRReg< 30, "FP">, DwarfRegNum<[30]>; def RA : MipsGPRReg< 31, "RA">, DwarfRegNum<[31]>; - + /// Mips Single point precision FPU Registers def F0 : FPR< 0, "F0">, DwarfRegNum<[32]>; def F1 : FPR< 1, "F1">, DwarfRegNum<[33]>; @@ -117,7 +117,7 @@ let Namespace = "Mips" in { def F29 : FPR<29, "F29">, DwarfRegNum<[61]>; def F30 : FPR<30, "F30">, DwarfRegNum<[62]>; def F31 : FPR<31, "F31">, DwarfRegNum<[63]>; - + /// Mips Double point precision FPU Registers (aliased /// with the single precision to hold 64 bit values) def D0 : AFPR< 0, "F0", [F0, F1]>, DwarfRegNum<[32]>; @@ -149,11 +149,11 @@ let Namespace = "Mips" in { // Register Classes //===----------------------------------------------------------------------===// -def CPURegs : RegisterClass<"Mips", [i32], 32, +def CPURegs : RegisterClass<"Mips", [i32], 32, // Return Values and Arguments [V0, V1, A0, A1, A2, A3, // Not preserved across procedure calls - T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, + T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, // Callee save S0, S1, S2, S3, S4, S5, S6, S7, // Reserved @@ -173,16 +173,16 @@ def CPURegs : RegisterClass<"Mips", [i32], 32, // 64bit fp: // * FGR64 - 32 64-bit registers -// * AFGR64 - 16 32-bit even registers (32-bit FP Mode) +// * AFGR64 - 16 32-bit even registers (32-bit FP Mode) // // 32bit fp: // * FGR32 - 16 32-bit even registers // * FGR32 - 32 32-bit registers (single float only mode) -def FGR32 : RegisterClass<"Mips", [f32], 32, +def FGR32 : RegisterClass<"Mips", [f32], 32, // Return Values and Arguments [F0, F1, F2, F3, F12, F13, F14, F15, // Not preserved across procedure calls - F4, F5, F6, F7, F8, F9, F10, F11, F16, F17, F18, F19, + F4, F5, F6, F7, F8, F9, F10, F11, F16, F17, F18, F19, // Callee save F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, // Reserved @@ -195,17 +195,17 @@ def FGR32 : RegisterClass<"Mips", [f32], 32, let MethodBodies = [{ static const unsigned MIPS_FGR32[] = { - Mips::F0, Mips::F1, Mips::F2, Mips::F3, Mips::F12, Mips::F13, - Mips::F14, Mips::F15, Mips::F4, Mips::F5, Mips::F6, Mips::F7, - Mips::F8, Mips::F9, Mips::F10, Mips::F11, Mips::F16, Mips::F17, - Mips::F18, Mips::F19, Mips::F20, Mips::F21, Mips::F22, Mips::F23, - Mips::F24, Mips::F25, Mips::F26, Mips::F27, Mips::F28, Mips::F29, + Mips::F0, Mips::F1, Mips::F2, Mips::F3, Mips::F12, Mips::F13, + Mips::F14, Mips::F15, Mips::F4, Mips::F5, Mips::F6, Mips::F7, + Mips::F8, Mips::F9, Mips::F10, Mips::F11, Mips::F16, Mips::F17, + Mips::F18, Mips::F19, Mips::F20, Mips::F21, Mips::F22, Mips::F23, + Mips::F24, Mips::F25, Mips::F26, Mips::F27, Mips::F28, Mips::F29, Mips::F30 }; static const unsigned MIPS_SVR4_FGR32[] = { - Mips::F0, Mips::F2, Mips::F12, Mips::F14, Mips::F4, - Mips::F6, Mips::F8, Mips::F10, Mips::F16, Mips::F18, + Mips::F0, Mips::F2, Mips::F12, Mips::F14, Mips::F4, + Mips::F6, Mips::F8, Mips::F10, Mips::F16, Mips::F18, Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30, }; @@ -217,7 +217,7 @@ def FGR32 : RegisterClass<"Mips", [f32], 32, if (Subtarget.isSingleFloat()) return MIPS_FGR32; else - return MIPS_SVR4_FGR32; + return MIPS_SVR4_FGR32; } FGR32Class::iterator @@ -233,11 +233,11 @@ def FGR32 : RegisterClass<"Mips", [f32], 32, }]; } -def AFGR64 : RegisterClass<"Mips", [f64], 64, +def AFGR64 : RegisterClass<"Mips", [f64], 64, // Return Values and Arguments [D0, D1, D6, D7, // Not preserved across procedure calls - D2, D3, D4, D5, D8, D9, + D2, D3, D4, D5, D8, D9, // Callee save D10, D11, D12, D13, D14, // Reserved diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td index 49ca5d1..00be8ee 100644 --- a/lib/Target/Mips/MipsSchedule.td +++ b/lib/Target/Mips/MipsSchedule.td @@ -14,7 +14,7 @@ def ALU : FuncUnit; def IMULDIV : FuncUnit; //===----------------------------------------------------------------------===// -// Instruction Itinerary classes used for Mips +// Instruction Itinerary classes used for Mips //===----------------------------------------------------------------------===// def IIAlu : InstrItinClass; def IILoad : InstrItinClass; diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index db114da..70747f5d 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -17,7 +17,7 @@ using namespace llvm; MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &FS, - bool little) : + bool little) : MipsArchVersion(Mips1), MipsABI(O32), IsLittle(little), IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), HasMinMax(false), @@ -33,7 +33,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &FS, if (TT.find("linux") == std::string::npos) IsLinux = false; - // When only the target triple is specified and is + // When only the target triple is specified and is // a allegrex target, set the features. We also match // big and little endian allegrex cores (dont really // know if a big one exists) diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index e4f4b33..096bbed 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -26,7 +26,7 @@ class MipsSubtarget : public TargetSubtarget { public: enum MipsABIEnum { O32, O64, N32, N64, EABI - }; + }; protected: @@ -34,10 +34,10 @@ protected: Mips1, Mips2, Mips3, Mips4, Mips32, Mips32r2 }; - // Mips architecture version + // Mips architecture version MipsArchEnum MipsArchVersion; - // Mips supported ABIs + // Mips supported ABIs MipsABIEnum MipsABI; // IsLittle - The target is Little Endian @@ -61,14 +61,14 @@ protected: bool IsLinux; /// Features related to the presence of specific instructions. - + // HasSEInReg - SEB and SEH (signext in register) instructions. bool HasSEInReg; // HasCondMov - Conditional mov (MOVZ, MOVN) instructions. bool HasCondMov; - // HasMulDivAdd - Multiply add and sub (MADD, MADDu, MSUB, MSUBu) + // HasMulDivAdd - Multiply add and sub (MADD, MADDu, MSUB, MSUBu) // instructions. bool HasMulDivAdd; @@ -93,14 +93,14 @@ public: /// This constructor initializes the data members to match that /// of the specified triple. MipsSubtarget(const std::string &TT, const std::string &FS, bool little); - - /// ParseSubtargetFeatures - Parses features string setting specified + + /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. std::string ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); bool isMips1() const { return MipsArchVersion == Mips1; } - bool isMips32() const { return MipsArchVersion >= Mips32; } + bool isMips32() const { return MipsArchVersion >= Mips32; } bool isMips32r2() const { return MipsArchVersion == Mips32r2; } bool isLittle() const { return IsLittle; } diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 7a2dd1f..53190b4 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -75,3 +75,9 @@ addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) PM.add(createMipsDelaySlotFillerPass(*this)); return true; } + +bool MipsTargetMachine:: +addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { + PM.add(createMipsExpandPseudoPass(*this)); + return true; +} diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 43ab798..badb652 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -63,6 +63,7 @@ namespace llvm { CodeGenOpt::Level OptLevel); virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addPostRegAlloc(PassManagerBase &, CodeGenOpt::Level); }; /// MipselTargetMachine - Mipsel target machine. diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h index 237b160..c394a9d 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.h +++ b/lib/Target/Mips/MipsTargetObjectFile.h @@ -18,22 +18,22 @@ namespace llvm { const MCSection *SmallDataSection; const MCSection *SmallBSSSection; public: - + void Initialize(MCContext &Ctx, const TargetMachine &TM); - + /// IsGlobalInSmallSection - Return true if this global address should be /// placed into small data/bss section. bool IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, SectionKind Kind)const; bool IsGlobalInSmallSection(const GlobalValue *GV, - const TargetMachine &TM) const; - + const TargetMachine &TM) const; + const MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const; - + // TODO: Classify globals as mips wishes. }; } // end namespace llvm diff --git a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp index cc3d61e..a8d6fe9 100644 --- a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp +++ b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp @@ -14,7 +14,7 @@ using namespace llvm; Target llvm::TheMipsTarget, llvm::TheMipselTarget; -extern "C" void LLVMInitializeMipsTargetInfo() { +extern "C" void LLVMInitializeMipsTargetInfo() { RegisterTarget<Triple::mips> X(TheMipsTarget, "mips", "Mips"); RegisterTarget<Triple::mipsel> Y(TheMipselTarget, "mipsel", "Mipsel"); diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h index 19385ba..ec2be92 100644 --- a/lib/Target/PTX/PTX.h +++ b/lib/Target/PTX/PTX.h @@ -29,6 +29,11 @@ namespace llvm { PARAMETER = 3, SHARED = 4 }; + + enum Predicate { + PRED_NORMAL = 0, + PRED_NEGATE = 1 + }; } // namespace PTX FunctionPass *createPTXISelDag(PTXTargetMachine &TM, @@ -37,7 +42,8 @@ namespace llvm { FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel); - extern Target ThePTXTarget; + extern Target ThePTX32Target; + extern Target ThePTX64Target; } // namespace llvm; // Defines symbolic names for PTX registers. diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td index 8b1a1b1..ae8326e 100644 --- a/lib/Target/PTX/PTX.td +++ b/lib/Target/PTX/PTX.td @@ -19,8 +19,35 @@ include "llvm/Target/Target.td" // Subtarget Features. //===----------------------------------------------------------------------===// -def FeatureSM20 : SubtargetFeature<"sm20", "is_sm20", "true", - "Enable sm_20 target architecture">; +//===- Architectural Features ---------------------------------------------===// + +def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true", + "Do not demote .f64 to .f32">; + +//===- PTX Version --------------------------------------------------------===// + +def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0", + "Use PTX Language Version 2.0", + []>; + +def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1", + "Use PTX Language Version 2.1", + [FeaturePTX20]>; + +def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2", + "Use PTX Language Version 2.2", + [FeaturePTX21]>; + +//===- PTX Shader Model ---------------------------------------------------===// + +def FeatureSM10 : SubtargetFeature<"sm10", "PTXShaderModel", "PTX_SM_1_0", + "Enable Shader Model 1.0 compliance">; +def FeatureSM13 : SubtargetFeature<"sm13", "PTXShaderModel", "PTX_SM_1_3", + "Enable Shader Model 1.3 compliance", + [FeatureSM10, FeatureDouble]>; +def FeatureSM20 : SubtargetFeature<"sm20", "PTXShaderModel", "PTX_SM_2_0", + "Enable Shader Model 2.0 compliance", + [FeatureSM13]>; //===----------------------------------------------------------------------===// // PTX supported processors. diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp index a605997..29c4781 100644 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" @@ -37,13 +38,6 @@ using namespace llvm; -static cl::opt<std::string> -OptPTXVersion("ptx-version", cl::desc("Set PTX version"), cl::init("1.4")); - -static cl::opt<std::string> -OptPTXTarget("ptx-target", cl::desc("Set GPU target (comma-separated list)"), - cl::init("sm_10")); - namespace { class PTXAsmPrinter : public AsmPrinter { public: @@ -68,6 +62,7 @@ public: const char *Modifier = 0); void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, const char *Modifier = 0); + void printPredicateOperand(const MachineInstr *MI, raw_ostream &O); // autogen'd. void printInstruction(const MachineInstr *MI, raw_ostream &OS); @@ -82,27 +77,20 @@ private: static const char PARAM_PREFIX[] = "__param_"; static const char *getRegisterTypeName(unsigned RegNo) { -#define TEST_REGCLS(cls, clsstr) \ +#define TEST_REGCLS(cls, clsstr) \ if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr; - TEST_REGCLS(RRegs32, s32); TEST_REGCLS(Preds, pred); + TEST_REGCLS(RRegu16, u16); + TEST_REGCLS(RRegu32, u32); + TEST_REGCLS(RRegu64, u64); + TEST_REGCLS(RRegf32, f32); + TEST_REGCLS(RRegf64, f64); #undef TEST_REGCLS llvm_unreachable("Not in any register class!"); return NULL; } -static const char *getInstructionTypeName(const MachineInstr *MI) { - for (int i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (MO.getType() == MachineOperand::MO_Register) - return getRegisterTypeName(MO.getReg()); - } - - llvm_unreachable("No reg operand found in instruction!"); - return NULL; -} - static const char *getStateSpaceName(unsigned addressSpace) { switch (addressSpace) { default: llvm_unreachable("Unknown state space"); @@ -115,6 +103,28 @@ static const char *getStateSpaceName(unsigned addressSpace) { return NULL; } +static const char *getTypeName(const Type* type) { + while (true) { + switch (type->getTypeID()) { + default: llvm_unreachable("Unknown type"); + case Type::FloatTyID: return ".f32"; + case Type::DoubleTyID: return ".f64"; + case Type::IntegerTyID: + switch (type->getPrimitiveSizeInBits()) { + default: llvm_unreachable("Unknown integer bit-width"); + case 16: return ".u16"; + case 32: return ".u32"; + case 64: return ".u64"; + } + case Type::ArrayTyID: + case Type::PointerTyID: + type = dyn_cast<const SequentialType>(type)->getElementType(); + break; + } + } + return NULL; +} + bool PTXAsmPrinter::doFinalization(Module &M) { // XXX Temproarily remove global variables so that doFinalization() will not // emit them again (global variables are emitted at beginning). @@ -146,8 +156,12 @@ bool PTXAsmPrinter::doFinalization(Module &M) { void PTXAsmPrinter::EmitStartOfAsmFile(Module &M) { - OutStreamer.EmitRawText(Twine("\t.version " + OptPTXVersion)); - OutStreamer.EmitRawText(Twine("\t.target " + OptPTXTarget)); + const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>(); + + OutStreamer.EmitRawText(Twine("\t.version " + ST.getPTXVersionString())); + OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() + + (ST.supportsDouble() ? "" + : ", map_f64_to_f32"))); OutStreamer.AddBlankLine(); // declare global variables @@ -186,17 +200,16 @@ void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { std::string str; str.reserve(64); - // Write instruction to str raw_string_ostream OS(str); + + // Emit predicate + printPredicateOperand(MI, OS); + + // Write instruction to str printInstruction(MI, OS); OS << ';'; OS.flush(); - // Replace "%type" if found - size_t pos; - if ((pos = str.find("%type")) != std::string::npos) - str.replace(pos, /*strlen("%type")==*/5, getInstructionTypeName(MI)); - StringRef strref = StringRef(str); OutStreamer.EmitRawText(strref); } @@ -213,11 +226,36 @@ void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, OS << *Mang->getSymbol(MO.getGlobal()); break; case MachineOperand::MO_Immediate: - OS << (int) MO.getImm(); + OS << (long) MO.getImm(); + break; + case MachineOperand::MO_MachineBasicBlock: + OS << *MO.getMBB()->getSymbol(); break; case MachineOperand::MO_Register: OS << getRegisterName(MO.getReg()); break; + case MachineOperand::MO_FPImmediate: + APInt constFP = MO.getFPImm()->getValueAPF().bitcastToAPInt(); + bool isFloat = MO.getFPImm()->getType()->getTypeID() == Type::FloatTyID; + // Emit 0F for 32-bit floats and 0D for 64-bit doubles. + if (isFloat) { + OS << "0F"; + } + else { + OS << "0D"; + } + // Emit the encoded floating-point value. + if (constFP.getZExtValue() > 0) { + OS << constFP.toString(16, false); + } + else { + OS << "00000000"; + // If We have a double-precision zero, pad to 8-bytes. + if (!isFloat) { + OS << "00000000"; + } + } + break; } } @@ -265,13 +303,77 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { decl += " "; } - // TODO: add types - decl += ".s32 "; - decl += gvsym->getName(); + if (PointerType::classof(gv->getType())) { + const PointerType* pointerTy = dyn_cast<const PointerType>(gv->getType()); + const Type* elementTy = pointerTy->getElementType(); + + decl += ".b8 "; + decl += gvsym->getName(); + decl += "["; + + if (elementTy->isArrayTy()) + { + assert(elementTy->isArrayTy() && "Only pointers to arrays are supported"); - if (ArrayType::classof(gv->getType()) || PointerType::classof(gv->getType())) - decl += "[]"; + const ArrayType* arrayTy = dyn_cast<const ArrayType>(elementTy); + elementTy = arrayTy->getElementType(); + + unsigned numElements = arrayTy->getNumElements(); + + while (elementTy->isArrayTy()) { + + arrayTy = dyn_cast<const ArrayType>(elementTy); + elementTy = arrayTy->getElementType(); + + numElements *= arrayTy->getNumElements(); + } + + // FIXME: isPrimitiveType() == false for i16? + assert(elementTy->isSingleValueType() && + "Non-primitive types are not handled"); + + // Compute the size of the array, in bytes. + uint64_t arraySize = (elementTy->getPrimitiveSizeInBits() >> 3) + * numElements; + + decl += utostr(arraySize); + } + + decl += "]"; + + // handle string constants (assume ConstantArray means string) + + if (gv->hasInitializer()) + { + Constant *C = gv->getInitializer(); + if (const ConstantArray *CA = dyn_cast<ConstantArray>(C)) + { + decl += " = {"; + + for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) + { + if (i > 0) decl += ","; + + decl += "0x" + utohexstr(cast<ConstantInt>(CA->getOperand(i))->getZExtValue()); + } + + decl += "}"; + } + } + } + else { + // Note: this is currently the fall-through case and most likely generates + // incorrect code. + decl += getTypeName(gv->getType()); + decl += " "; + + decl += gvsym->getName(); + + if (ArrayType::classof(gv->getType()) || + PointerType::classof(gv->getType())) + decl += "[]"; + } decl += ";"; @@ -313,16 +415,24 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { if (!MFI->argRegEmpty()) { decl += " ("; if (isKernel) { - for (int i = 0, e = MFI->getNumArg(); i != e; ++i) { - if (i != 0) + unsigned cnt = 0; + for(PTXMachineFunctionInfo::reg_iterator + i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; + i != e; ++i) { + reg = *i; + assert(reg != PTX::NoRegister && "Not a valid register!"); + if (i != b) decl += ", "; - decl += ".param .s32 "; // TODO: add types + decl += ".param ."; + decl += getRegisterTypeName(reg); + decl += " "; decl += PARAM_PREFIX; - decl += utostr(i + 1); + decl += utostr(++cnt); } } else { for (PTXMachineFunctionInfo::reg_iterator - i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; i != e; ++i) { + i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; + i != e; ++i) { reg = *i; assert(reg != PTX::NoRegister && "Not a valid register!"); if (i != b) @@ -339,9 +449,29 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { OutStreamer.EmitRawText(Twine(decl)); } +void PTXAsmPrinter:: +printPredicateOperand(const MachineInstr *MI, raw_ostream &O) { + int i = MI->findFirstPredOperandIdx(); + if (i == -1) + llvm_unreachable("missing predicate operand"); + + unsigned reg = MI->getOperand(i).getReg(); + int predOp = MI->getOperand(i+1).getImm(); + + DEBUG(dbgs() << "predicate: (" << reg << ", " << predOp << ")\n"); + + if (reg != PTX::NoRegister) { + O << '@'; + if (predOp == PTX::PRED_NEGATE) + O << '!'; + O << getRegisterName(reg); + } +} + #include "PTXGenAsmWriter.inc" // Force static initialization. extern "C" void LLVMInitializePTXAsmPrinter() { - RegisterAsmPrinter<PTXAsmPrinter> X(ThePTXTarget); + RegisterAsmPrinter<PTXAsmPrinter> X(ThePTX32Target); + RegisterAsmPrinter<PTXAsmPrinter> Y(ThePTX64Target); } diff --git a/lib/Target/PTX/PTXFrameLowering.h b/lib/Target/PTX/PTXFrameLowering.h index 574ae7a..9320676 100644 --- a/lib/Target/PTX/PTXFrameLowering.h +++ b/lib/Target/PTX/PTXFrameLowering.h @@ -27,7 +27,8 @@ protected: public: explicit PTXFrameLowering(const PTXSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2), STI(sti) { + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2), + STI(sti) { } /// emitProlog/emitEpilog - These methods insert prolog and epilog code into diff --git a/lib/Target/PTX/PTXISelDAGToDAG.cpp b/lib/Target/PTX/PTXISelDAGToDAG.cpp index efb0e8b..b3c85da 100644 --- a/lib/Target/PTX/PTXISelDAGToDAG.cpp +++ b/lib/Target/PTX/PTXISelDAGToDAG.cpp @@ -15,6 +15,7 @@ #include "PTXTargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/DerivedTypes.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -42,8 +43,14 @@ class PTXDAGToDAGISel : public SelectionDAGISel { private: SDNode *SelectREAD_PARAM(SDNode *Node); + // We need this only because we can't match intruction BRAdp + // pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td + SDNode *SelectBRCOND(SDNode *Node); + bool isImm(const SDValue &operand); bool SelectImm(const SDValue &operand, SDValue &imm); + + const PTXSubtarget& getSubtarget() const; }; // class PTXDAGToDAGISel } // namespace @@ -59,21 +66,62 @@ PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM, : SelectionDAGISel(TM, OptLevel) {} SDNode *PTXDAGToDAGISel::Select(SDNode *Node) { - if (Node->getOpcode() == PTXISD::READ_PARAM) - return SelectREAD_PARAM(Node); - else - return SelectCode(Node); + switch (Node->getOpcode()) { + case PTXISD::READ_PARAM: + return SelectREAD_PARAM(Node); + case ISD::BRCOND: + return SelectBRCOND(Node); + default: + return SelectCode(Node); + } } SDNode *PTXDAGToDAGISel::SelectREAD_PARAM(SDNode *Node) { - SDValue index = Node->getOperand(1); - DebugLoc dl = Node->getDebugLoc(); + SDValue index = Node->getOperand(1); + DebugLoc dl = Node->getDebugLoc(); + unsigned opcode; if (index.getOpcode() != ISD::TargetConstant) llvm_unreachable("READ_PARAM: index is not ISD::TargetConstant"); + if (Node->getValueType(0) == MVT::i16) { + opcode = PTX::LDpiU16; + } + else if (Node->getValueType(0) == MVT::i32) { + opcode = PTX::LDpiU32; + } + else if (Node->getValueType(0) == MVT::i64) { + opcode = PTX::LDpiU64; + } + else if (Node->getValueType(0) == MVT::f32) { + opcode = PTX::LDpiF32; + } + else if (Node->getValueType(0) == MVT::f64) { + opcode = PTX::LDpiF64; + } + else { + llvm_unreachable("Unknown parameter type for ld.param"); + } + return PTXInstrInfo:: - GetPTXMachineNode(CurDAG, PTX::LDpi, dl, MVT::i32, index); + GetPTXMachineNode(CurDAG, opcode, dl, Node->getValueType(0), index); +} + +SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) { + assert(Node->getNumOperands() >= 3); + + SDValue Chain = Node->getOperand(0); + SDValue Pred = Node->getOperand(1); + SDValue Target = Node->getOperand(2); // branch target + SDValue PredOp = CurDAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32); + DebugLoc dl = Node->getDebugLoc(); + + assert(Target.getOpcode() == ISD::BasicBlock); + assert(Pred.getValueType() == MVT::i1); + + // Emit BRAdp + SDValue Ops[] = { Target, Pred, PredOp, Chain }; + return CurDAG->getMachineNode(PTX::BRAdp, dl, MVT::Other, Ops, 4); } // Match memory operand of the form [reg+reg] @@ -82,8 +130,11 @@ bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) { isImm(Addr.getOperand(0)) || isImm(Addr.getOperand(1))) return false; + assert(Addr.getValueType().isSimple() && "Type must be simple"); + R1 = Addr; - R2 = CurDAG->getTargetConstant(0, MVT::i32); + R2 = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + return true; } @@ -95,8 +146,12 @@ bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base, if (isImm(Addr)) return false; // it is [reg] + + assert(Addr.getValueType().isSimple() && "Type must be simple"); + Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); + Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + return true; } @@ -129,7 +184,10 @@ bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base, // is [imm]? if (SelectImm(Addr, Base)) { - Offset = CurDAG->getTargetConstant(0, MVT::i32); + assert(Addr.getValueType().isSimple() && "Type must be simple"); + + Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + return true; } @@ -146,6 +204,13 @@ bool PTXDAGToDAGISel::SelectImm(const SDValue &operand, SDValue &imm) { return false; ConstantSDNode *CN = cast<ConstantSDNode>(node); - imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(), MVT::i32); + imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(), + operand.getValueType()); return true; } + +const PTXSubtarget& PTXDAGToDAGISel::getSubtarget() const +{ + return TM.getSubtarget<PTXSubtarget>(); +} + diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp index e6d4490..23b93da 100644 --- a/lib/Target/PTX/PTXISelLowering.cpp +++ b/lib/Target/PTX/PTXISelLowering.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -27,21 +28,60 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { // Set up the register classes. addRegisterClass(MVT::i1, PTX::PredsRegisterClass); - addRegisterClass(MVT::i32, PTX::RRegs32RegisterClass); - + addRegisterClass(MVT::i16, PTX::RRegu16RegisterClass); + addRegisterClass(MVT::i32, PTX::RRegu32RegisterClass); + addRegisterClass(MVT::i64, PTX::RRegu64RegisterClass); + addRegisterClass(MVT::f32, PTX::RRegf32RegisterClass); + addRegisterClass(MVT::f64, PTX::RRegf64RegisterClass); + + setBooleanContents(ZeroOrOneBooleanContent); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + + // Turn i16 (z)extload into load + (z)extend + setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand); + + // Turn f32 extload into load + fextend + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + + // Turn f64 truncstore into trunc + store. + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + // Customize translation of memory addresses setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + + // Expand BR_CC into BRCOND + setOperationAction(ISD::BR_CC, MVT::Other, Expand); + + // Expand SELECT_CC into SETCC + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + + // need to lower SETCC of Preds into bitwise logic + setOperationAction(ISD::SETCC, MVT::i1, Custom); + // Compute derived properties from the register classes computeRegisterProperties(); } +MVT::SimpleValueType PTXTargetLowering::getSetCCResultType(EVT VT) const { + return MVT::i1; +} + SDValue PTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { - default: llvm_unreachable("Unimplemented operand"); - case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + default: + llvm_unreachable("Unimplemented operand"); + case ISD::SETCC: + return LowerSETCC(Op, DAG); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG); } } @@ -49,6 +89,8 @@ const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { default: llvm_unreachable("Unknown opcode"); + case PTXISD::COPY_ADDRESS: + return "PTXISD::COPY_ADDRESS"; case PTXISD::READ_PARAM: return "PTXISD::READ_PARAM"; case PTXISD::EXIT: @@ -62,12 +104,43 @@ const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const { // Custom Lower Operation //===----------------------------------------------------------------------===// +SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::i1 && "SetCC type must be 1-bit integer"); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + + // Look for X == 0, X == 1, X != 0, or X != 1 + // We can simplify these to bitwise logic + + if (Op1.getOpcode() == ISD::Constant && + (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 || + cast<ConstantSDNode>(Op1)->isNullValue()) && + (CC == ISD::SETEQ || CC == ISD::SETNE)) { + + return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1); + } + + return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2); +} + SDValue PTXTargetLowering:: LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - return DAG.getTargetGlobalAddress(GV, dl, PtrVT); + + assert(PtrVT.isSimple() && "Pointer must be to primitive type."); + + SDValue targetGlobal = DAG.getTargetGlobalAddress(GV, dl, PtrVT); + SDValue movInstr = DAG.getNode(PTXISD::COPY_ADDRESS, + dl, + PtrVT.getSimpleVT(), + targetGlobal); + + return movInstr; } //===----------------------------------------------------------------------===// @@ -87,9 +160,13 @@ struct argmap_entry { bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; } } argmap[] = { argmap_entry(MVT::i1, PTX::PredsRegisterClass), - argmap_entry(MVT::i32, PTX::RRegs32RegisterClass) + argmap_entry(MVT::i16, PTX::RRegu16RegisterClass), + argmap_entry(MVT::i32, PTX::RRegu32RegisterClass), + argmap_entry(MVT::i64, PTX::RRegu64RegisterClass), + argmap_entry(MVT::f32, PTX::RRegf32RegisterClass), + argmap_entry(MVT::f64, PTX::RRegf64RegisterClass) }; -} // end anonymous namespace +} // end anonymous namespace SDValue PTXTargetLowering:: LowerFormalArguments(SDValue Chain, @@ -185,10 +262,25 @@ SDValue PTXTargetLowering:: if (Outs.size() == 0) return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain); - assert(Outs[0].VT == MVT::i32 && "Can return only basic types"); - SDValue Flag; - unsigned reg = PTX::R0; + unsigned reg; + + if (Outs[0].VT == MVT::i16) { + reg = PTX::RH0; + } + else if (Outs[0].VT == MVT::i32) { + reg = PTX::R0; + } + else if (Outs[0].VT == MVT::i64) { + reg = PTX::RD0; + } + else if (Outs[0].VT == MVT::f32) { + reg = PTX::F0; + } + else { + assert(Outs[0].VT == MVT::f64 && "Can return only basic types"); + reg = PTX::FD0; + } MachineFunction &MF = DAG.getMachineFunction(); PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); diff --git a/lib/Target/PTX/PTXISelLowering.h b/lib/Target/PTX/PTXISelLowering.h index b03a9f6..6a7e3e6 100644 --- a/lib/Target/PTX/PTXISelLowering.h +++ b/lib/Target/PTX/PTXISelLowering.h @@ -26,7 +26,8 @@ namespace PTXISD { FIRST_NUMBER = ISD::BUILTIN_OP_END, READ_PARAM, EXIT, - RET + RET, + COPY_ADDRESS }; } // namespace PTXISD @@ -41,6 +42,8 @@ class PTXTargetLowering : public TargetLowering { virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + virtual SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, @@ -58,7 +61,9 @@ class PTXTargetLowering : public TargetLowering { const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; - + + virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; + private: SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; }; // class PTXTargetLowering diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp index 805759b..a12a6d0 100644 --- a/lib/Target/PTX/PTXInstrInfo.cpp +++ b/lib/Target/PTX/PTXInstrInfo.cpp @@ -11,9 +11,15 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "ptx-instrinfo" + #include "PTX.h" #include "PTXInstrInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -27,20 +33,27 @@ static const struct map_entry { const TargetRegisterClass *cls; const int opcode; } map[] = { - { &PTX::RRegs32RegClass, PTX::MOVrr }, - { &PTX::PredsRegClass, PTX::MOVpp } + { &PTX::RRegu16RegClass, PTX::MOVU16rr }, + { &PTX::RRegu32RegClass, PTX::MOVU32rr }, + { &PTX::RRegu64RegClass, PTX::MOVU64rr }, + { &PTX::RRegf32RegClass, PTX::MOVF32rr }, + { &PTX::RRegf64RegClass, PTX::MOVF64rr }, + { &PTX::PredsRegClass, PTX::MOVPREDrr } }; void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DstReg, unsigned SrcReg, bool KillSrc) const { - for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) - if (PTX::RRegs32RegClass.contains(DstReg, SrcReg)) { - BuildMI(MBB, I, DL, - get(PTX::MOVrr), DstReg).addReg(SrcReg, getKillRegState(KillSrc)); + for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) { + if (map[i].cls->contains(DstReg, SrcReg)) { + const TargetInstrDesc &TID = get(map[i].opcode); + MachineInstr *MI = BuildMI(MBB, I, DL, TID, DstReg). + addReg(SrcReg, getKillRegState(KillSrc)); + AddDefaultPredicate(MI); return; } + } llvm_unreachable("Impossible reg-to-reg copy"); } @@ -56,12 +69,9 @@ bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB, for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) if (DstRC == map[i].cls) { - MachineInstr *MI = BuildMI(MBB, I, DL, get(map[i].opcode), - DstReg).addReg(SrcReg); - if (MI->findFirstPredOperandIdx() == -1) { - MI->addOperand(MachineOperand::CreateReg(0, false)); - MI->addOperand(MachineOperand::CreateImm(/*IsInv=*/0)); - } + const TargetInstrDesc &TID = get(map[i].opcode); + MachineInstr *MI = BuildMI(MBB, I, DL, TID, DstReg).addReg(SrcReg); + AddDefaultPredicate(MI); return true; } @@ -74,8 +84,12 @@ bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI, switch (MI.getOpcode()) { default: return false; - case PTX::MOVpp: - case PTX::MOVrr: + case PTX::MOVU16rr: + case PTX::MOVU32rr: + case PTX::MOVU64rr: + case PTX::MOVF32rr: + case PTX::MOVF64rr: + case PTX::MOVPREDrr: assert(MI.getNumOperands() >= 2 && MI.getOperand(0).isReg() && MI.getOperand(1).isReg() && "Invalid register-register move instruction"); @@ -85,3 +99,239 @@ bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI, return true; } } + +// predicate support + +bool PTXInstrInfo::isPredicated(const MachineInstr *MI) const { + int i = MI->findFirstPredOperandIdx(); + return i != -1 && MI->getOperand(i).getReg() != PTX::NoRegister; +} + +bool PTXInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { + return !isPredicated(MI) && get(MI->getOpcode()).isTerminator(); +} + +bool PTXInstrInfo:: +PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Pred) const { + if (Pred.size() < 2) + llvm_unreachable("lesser than 2 predicate operands are provided"); + + int i = MI->findFirstPredOperandIdx(); + if (i == -1) + llvm_unreachable("missing predicate operand"); + + MI->getOperand(i).setReg(Pred[0].getReg()); + MI->getOperand(i+1).setImm(Pred[1].getImm()); + + return true; +} + +bool PTXInstrInfo:: +SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, + const SmallVectorImpl<MachineOperand> &Pred2) const { + const MachineOperand &PredReg1 = Pred1[0]; + const MachineOperand &PredReg2 = Pred2[0]; + if (PredReg1.getReg() != PredReg2.getReg()) + return false; + + const MachineOperand &PredOp1 = Pred1[1]; + const MachineOperand &PredOp2 = Pred2[1]; + if (PredOp1.getImm() != PredOp2.getImm()) + return false; + + return true; +} + +bool PTXInstrInfo:: +DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const { + // If an instruction sets a predicate register, it defines a predicate. + + // TODO supprot 5-operand format of setp instruction + + if (MI->getNumOperands() < 1) + return false; + + const MachineOperand &MO = MI->getOperand(0); + + if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::PredsRegClass) + return false; + + Pred.push_back(MO); + Pred.push_back(MachineOperand::CreateImm(PTX::PRED_NORMAL)); + return true; +} + +// branch support + +bool PTXInstrInfo:: +AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { + // TODO implement cases when AllowModify is true + + if (MBB.empty()) + return true; + + MachineBasicBlock::const_iterator iter = MBB.end(); + const MachineInstr& instLast1 = *--iter; + const TargetInstrDesc &desc1 = instLast1.getDesc(); + // for special case that MBB has only 1 instruction + const bool IsSizeOne = MBB.size() == 1; + // if IsSizeOne is true, *--iter and instLast2 are invalid + // we put a dummy value in instLast2 and desc2 since they are used + const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter; + const TargetInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc(); + + DEBUG(dbgs() << "\n"); + DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n"); + DEBUG(dbgs() << "AnalyzeBranch: MBB: " << MBB.getName().str() << "\n"); + DEBUG(dbgs() << "AnalyzeBranch: TBB: " << TBB << "\n"); + DEBUG(dbgs() << "AnalyzeBranch: FBB: " << FBB << "\n"); + + // this block ends with no branches + if (!IsAnyKindOfBranch(instLast1)) { + DEBUG(dbgs() << "AnalyzeBranch: ends with no branch\n"); + return false; + } + + // this block ends with only an unconditional branch + if (desc1.isUnconditionalBranch() && + // when IsSizeOne is true, it "absorbs" the evaluation of instLast2 + (IsSizeOne || !IsAnyKindOfBranch(instLast2))) { + DEBUG(dbgs() << "AnalyzeBranch: ends with only uncond branch\n"); + TBB = GetBranchTarget(instLast1); + return false; + } + + // this block ends with a conditional branch and + // it falls through to a successor block + if (desc1.isConditionalBranch() && + IsAnySuccessorAlsoLayoutSuccessor(MBB)) { + DEBUG(dbgs() << "AnalyzeBranch: ends with cond branch and fall through\n"); + TBB = GetBranchTarget(instLast1); + int i = instLast1.findFirstPredOperandIdx(); + Cond.push_back(instLast1.getOperand(i)); + Cond.push_back(instLast1.getOperand(i+1)); + return false; + } + + // when IsSizeOne is true, we are done + if (IsSizeOne) + return true; + + // this block ends with a conditional branch + // followed by an unconditional branch + if (desc2.isConditionalBranch() && + desc1.isUnconditionalBranch()) { + DEBUG(dbgs() << "AnalyzeBranch: ends with cond and uncond branch\n"); + TBB = GetBranchTarget(instLast2); + FBB = GetBranchTarget(instLast1); + int i = instLast2.findFirstPredOperandIdx(); + Cond.push_back(instLast2.getOperand(i)); + Cond.push_back(instLast2.getOperand(i+1)); + return false; + } + + // branch cannot be understood + DEBUG(dbgs() << "AnalyzeBranch: cannot be understood\n"); + return true; +} + +unsigned PTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + unsigned count = 0; + while (!MBB.empty()) + if (IsAnyKindOfBranch(MBB.back())) { + MBB.pop_back(); + ++count; + } else + break; + DEBUG(dbgs() << "RemoveBranch: MBB: " << MBB.getName().str() << "\n"); + DEBUG(dbgs() << "RemoveBranch: remove " << count << " branch inst\n"); + return count; +} + +unsigned PTXInstrInfo:: +InsertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { + DEBUG(dbgs() << "InsertBranch: MBB: " << MBB.getName().str() << "\n"); + DEBUG(if (TBB) dbgs() << "InsertBranch: TBB: " << TBB->getName().str() + << "\n"; + else dbgs() << "InsertBranch: TBB: (NULL)\n"); + DEBUG(if (FBB) dbgs() << "InsertBranch: FBB: " << FBB->getName().str() + << "\n"; + else dbgs() << "InsertBranch: FBB: (NULL)\n"); + DEBUG(dbgs() << "InsertBranch: Cond size: " << Cond.size() << "\n"); + + assert(TBB && "TBB is NULL"); + + if (FBB) { + BuildMI(&MBB, DL, get(PTX::BRAdp)) + .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm()); + BuildMI(&MBB, DL, get(PTX::BRAd)) + .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL); + return 2; + } else if (Cond.size()) { + BuildMI(&MBB, DL, get(PTX::BRAdp)) + .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm()); + return 1; + } else { + BuildMI(&MBB, DL, get(PTX::BRAd)) + .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL); + return 1; + } +} + +// static helper routines + +MachineSDNode *PTXInstrInfo:: +GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, + DebugLoc dl, EVT VT, SDValue Op1) { + SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1); + SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32); + SDValue ops[] = { Op1, predReg, predOp }; + return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); +} + +MachineSDNode *PTXInstrInfo:: +GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, + DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) { + SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1); + SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32); + SDValue ops[] = { Op1, Op2, predReg, predOp }; + return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); +} + +void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) { + if (MI->findFirstPredOperandIdx() == -1) { + MI->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false)); + MI->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL)); + } +} + +bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) { + const TargetInstrDesc &desc = inst.getDesc(); + return desc.isTerminator() || desc.isBranch() || desc.isIndirectBranch(); +} + +bool PTXInstrInfo:: +IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB) { + for (MachineBasicBlock::const_succ_iterator + i = MBB.succ_begin(), e = MBB.succ_end(); i != e; ++i) + if (MBB.isLayoutSuccessor((const MachineBasicBlock*) &*i)) + return true; + return false; +} + +MachineBasicBlock *PTXInstrInfo::GetBranchTarget(const MachineInstr& inst) { + // FIXME So far all branch instructions put destination in 1st operand + const MachineOperand& target = inst.getOperand(0); + assert(target.isMBB() && "FIXME: detect branch target operand"); + return target.getMBB(); +} diff --git a/lib/Target/PTX/PTXInstrInfo.h b/lib/Target/PTX/PTXInstrInfo.h index e7f00f0..a04be77 100644 --- a/lib/Target/PTX/PTXInstrInfo.h +++ b/lib/Target/PTX/PTXInstrInfo.h @@ -15,61 +15,93 @@ #define PTX_INSTR_INFO_H #include "PTXRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/Target/TargetInstrInfo.h" namespace llvm { class PTXTargetMachine; +class MachineSDNode; +class SDValue; +class SelectionDAG; + class PTXInstrInfo : public TargetInstrInfoImpl { - private: - const PTXRegisterInfo RI; - PTXTargetMachine &TM; - - public: - explicit PTXInstrInfo(PTXTargetMachine &_TM); - - virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; } - - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DstReg, unsigned SrcReg, - bool KillSrc) const; - - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DstReg, unsigned SrcReg, - const TargetRegisterClass *DstRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; - - virtual bool isMoveInstr(const MachineInstr& MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - - // static helper routines - - static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, - DebugLoc dl, EVT VT, - SDValue Op1) { - SDValue pred_reg = DAG->getRegister(0, MVT::i1); - SDValue pred_imm = DAG->getTargetConstant(0, MVT::i32); - SDValue ops[] = { Op1, pred_reg, pred_imm }; - return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); - } - - static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, - DebugLoc dl, EVT VT, - SDValue Op1, - SDValue Op2) { - SDValue pred_reg = DAG->getRegister(0, MVT::i1); - SDValue pred_imm = DAG->getTargetConstant(0, MVT::i32); - SDValue ops[] = { Op1, Op2, pred_reg, pred_imm }; - return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); - } - - }; // class PTXInstrInfo +private: + const PTXRegisterInfo RI; + PTXTargetMachine &TM; + +public: + explicit PTXInstrInfo(PTXTargetMachine &_TM); + + virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; } + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DstReg, unsigned SrcReg, + bool KillSrc) const; + + virtual bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DstReg, unsigned SrcReg, + const TargetRegisterClass *DstRC, + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; + + virtual bool isMoveInstr(const MachineInstr& MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + + // predicate support + + virtual bool isPredicated(const MachineInstr *MI) const; + + virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const; + + virtual + bool PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Pred) const; + + virtual + bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, + const SmallVectorImpl<MachineOperand> &Pred2) const; + + virtual bool DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const; + + // PTX is fully-predicable + virtual bool isPredicable(MachineInstr *MI) const { return true; } + + // branch support + + virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify = false) const; + + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + + // static helper routines + + static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, + DebugLoc dl, EVT VT, + SDValue Op1); + + static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, + DebugLoc dl, EVT VT, + SDValue Op1, SDValue Op2); + + static void AddDefaultPredicate(MachineInstr *MI); + + static bool IsAnyKindOfBranch(const MachineInstr& inst); + + static bool IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB); + + static MachineBasicBlock *GetBranchTarget(const MachineInstr& inst); +}; // class PTXInstrInfo } // namespace llvm #endif // PTX_INSTR_INFO_H diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index 9a74778..1ac9d3f 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -18,6 +18,26 @@ include "PTXInstrFormats.td" //===----------------------------------------------------------------------===// +// Code Generation Predicates +//===----------------------------------------------------------------------===// + +// Addressing +def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">; +def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">; + +// Shader Model Support +def SupportsSM13 : Predicate<"getSubtarget().supportsSM13()">; +def DoesNotSupportSM13 : Predicate<"!getSubtarget().supportsSM13()">; +def SupportsSM20 : Predicate<"getSubtarget().supportsSM20()">; +def DoesNotSupportSM20 : Predicate<"!getSubtarget().supportsSM20()">; + +// PTX Version Support +def SupportsPTX21 : Predicate<"getSubtarget().supportsPTX21()">; +def DoesNotSupportPTX21 : Predicate<"!getSubtarget().supportsPTX21()">; +def SupportsPTX22 : Predicate<"getSubtarget().supportsPTX22()">; +def DoesNotSupportPTX22 : Predicate<"!getSubtarget().supportsPTX22()">; + +//===----------------------------------------------------------------------===// // Instruction Pattern Stuff //===----------------------------------------------------------------------===// @@ -107,24 +127,41 @@ def store_shared }]>; // Addressing modes. -def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>; -def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [], []>; -def ADDRii : ComplexPattern<i32, 2, "SelectADDRii", [], []>; +def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>; +def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>; +def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>; +def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>; +def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>; +def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>; // Address operands -def MEMri : Operand<i32> { +def MEMri32 : Operand<i32> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops RRegu32, i32imm); +} +def MEMri64 : Operand<i64> { let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops RRegs32, i32imm); + let MIOperandInfo = (ops RRegu64, i64imm); } -def MEMii : Operand<i32> { +def MEMii32 : Operand<i32> { let PrintMethod = "printMemOperand"; let MIOperandInfo = (ops i32imm, i32imm); } +def MEMii64 : Operand<i64> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops i64imm, i64imm); +} +// The operand here does not correspond to an actual address, so we +// can use i32 in 64-bit address modes. def MEMpi : Operand<i32> { let PrintMethod = "printParamOperand"; let MIOperandInfo = (ops i32imm); } +// Branch & call targets have OtherVT type. +def brtarget : Operand<OtherVT>; +def calltarget : Operand<i32>; + //===----------------------------------------------------------------------===// // PTX Specific Node Definitions //===----------------------------------------------------------------------===// @@ -138,66 +175,389 @@ def PTXexit : SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>; def PTXret : SDNode<"PTXISD::RET", SDTNone, [SDNPHasChain]>; +def PTXcopyaddress + : SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>; //===----------------------------------------------------------------------===// // Instruction Class Templates //===----------------------------------------------------------------------===// +//===- Floating-Point Instructions - 2 Operand Form -----------------------===// +multiclass PTX_FLOAT_2OP<string opcstr, SDNode opnode> { + def rr32 : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a), + !strconcat(opcstr, ".f32\t$d, $a"), + [(set RRegf32:$d, (opnode RRegf32:$a))]>; + def ri32 : InstPTX<(outs RRegf32:$d), + (ins f32imm:$a), + !strconcat(opcstr, ".f32\t$d, $a"), + [(set RRegf32:$d, (opnode fpimm:$a))]>; + def rr64 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a), + !strconcat(opcstr, ".f64\t$d, $a"), + [(set RRegf64:$d, (opnode RRegf64:$a))]>; + def ri64 : InstPTX<(outs RRegf64:$d), + (ins f64imm:$a), + !strconcat(opcstr, ".f64\t$d, $a"), + [(set RRegf64:$d, (opnode fpimm:$a))]>; +} + +//===- Floating-Point Instructions - 3 Operand Form -----------------------===// +multiclass PTX_FLOAT_3OP<string opcstr, SDNode opnode> { + def rr32 : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a, RRegf32:$b), + !strconcat(opcstr, ".f32\t$d, $a, $b"), + [(set RRegf32:$d, (opnode RRegf32:$a, RRegf32:$b))]>; + def ri32 : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a, f32imm:$b), + !strconcat(opcstr, ".f32\t$d, $a, $b"), + [(set RRegf32:$d, (opnode RRegf32:$a, fpimm:$b))]>; + def rr64 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a, RRegf64:$b), + !strconcat(opcstr, ".f64\t$d, $a, $b"), + [(set RRegf64:$d, (opnode RRegf64:$a, RRegf64:$b))]>; + def ri64 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a, f64imm:$b), + !strconcat(opcstr, ".f64\t$d, $a, $b"), + [(set RRegf64:$d, (opnode RRegf64:$a, fpimm:$b))]>; +} + +//===- Floating-Point Instructions - 4 Operand Form -----------------------===// +multiclass PTX_FLOAT_4OP<string opcstr, SDNode opnode1, SDNode opnode2> { + def rrr32 : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a, RRegf32:$b, RRegf32:$c), + !strconcat(opcstr, ".f32\t$d, $a, $b, $c"), + [(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a, + RRegf32:$b), + RRegf32:$c))]>; + def rri32 : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a, RRegf32:$b, f32imm:$c), + !strconcat(opcstr, ".f32\t$d, $a, $b, $c"), + [(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a, + RRegf32:$b), + fpimm:$c))]>; + def rrr64 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a, RRegf64:$b, RRegf64:$c), + !strconcat(opcstr, ".f64\t$d, $a, $b, $c"), + [(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a, + RRegf64:$b), + RRegf64:$c))]>; + def rri64 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a, RRegf64:$b, f64imm:$c), + !strconcat(opcstr, ".f64\t$d, $a, $b, $c"), + [(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a, + RRegf64:$b), + fpimm:$c))]>; +} + multiclass INT3<string opcstr, SDNode opnode> { - def rr : InstPTX<(outs RRegs32:$d), - (ins RRegs32:$a, RRegs32:$b), - !strconcat(opcstr, ".%type\t$d, $a, $b"), - [(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>; - def ri : InstPTX<(outs RRegs32:$d), - (ins RRegs32:$a, i32imm:$b), - !strconcat(opcstr, ".%type\t$d, $a, $b"), - [(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>; + def rr16 : InstPTX<(outs RRegu16:$d), + (ins RRegu16:$a, RRegu16:$b), + !strconcat(opcstr, ".u16\t$d, $a, $b"), + [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>; + def ri16 : InstPTX<(outs RRegu16:$d), + (ins RRegu16:$a, i16imm:$b), + !strconcat(opcstr, ".u16\t$d, $a, $b"), + [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>; + def rr32 : InstPTX<(outs RRegu32:$d), + (ins RRegu32:$a, RRegu32:$b), + !strconcat(opcstr, ".u32\t$d, $a, $b"), + [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>; + def ri32 : InstPTX<(outs RRegu32:$d), + (ins RRegu32:$a, i32imm:$b), + !strconcat(opcstr, ".u32\t$d, $a, $b"), + [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>; + def rr64 : InstPTX<(outs RRegu64:$d), + (ins RRegu64:$a, RRegu64:$b), + !strconcat(opcstr, ".u64\t$d, $a, $b"), + [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>; + def ri64 : InstPTX<(outs RRegu64:$d), + (ins RRegu64:$a, i64imm:$b), + !strconcat(opcstr, ".u64\t$d, $a, $b"), + [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>; +} + +multiclass PTX_LOGIC<string opcstr, SDNode opnode> { + def ripreds : InstPTX<(outs Preds:$d), + (ins Preds:$a, i1imm:$b), + !strconcat(opcstr, ".pred\t$d, $a, $b"), + [(set Preds:$d, (opnode Preds:$a, imm:$b))]>; + def rrpreds : InstPTX<(outs Preds:$d), + (ins Preds:$a, Preds:$b), + !strconcat(opcstr, ".pred\t$d, $a, $b"), + [(set Preds:$d, (opnode Preds:$a, Preds:$b))]>; + def rr16 : InstPTX<(outs RRegu16:$d), + (ins RRegu16:$a, RRegu16:$b), + !strconcat(opcstr, ".b16\t$d, $a, $b"), + [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>; + def ri16 : InstPTX<(outs RRegu16:$d), + (ins RRegu16:$a, i16imm:$b), + !strconcat(opcstr, ".b16\t$d, $a, $b"), + [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>; + def rr32 : InstPTX<(outs RRegu32:$d), + (ins RRegu32:$a, RRegu32:$b), + !strconcat(opcstr, ".b32\t$d, $a, $b"), + [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>; + def ri32 : InstPTX<(outs RRegu32:$d), + (ins RRegu32:$a, i32imm:$b), + !strconcat(opcstr, ".b32\t$d, $a, $b"), + [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>; + def rr64 : InstPTX<(outs RRegu64:$d), + (ins RRegu64:$a, RRegu64:$b), + !strconcat(opcstr, ".b64\t$d, $a, $b"), + [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>; + def ri64 : InstPTX<(outs RRegu64:$d), + (ins RRegu64:$a, i64imm:$b), + !strconcat(opcstr, ".b64\t$d, $a, $b"), + [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>; } -// no %type directive, non-communtable multiclass INT3ntnc<string opcstr, SDNode opnode> { - def rr : InstPTX<(outs RRegs32:$d), - (ins RRegs32:$a, RRegs32:$b), - !strconcat(opcstr, "\t$d, $a, $b"), - [(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>; - def ri : InstPTX<(outs RRegs32:$d), - (ins RRegs32:$a, i32imm:$b), - !strconcat(opcstr, "\t$d, $a, $b"), - [(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>; - def ir : InstPTX<(outs RRegs32:$d), - (ins i32imm:$a, RRegs32:$b), - !strconcat(opcstr, "\t$d, $a, $b"), - [(set RRegs32:$d, (opnode imm:$a, RRegs32:$b))]>; + def rr16 : InstPTX<(outs RRegu16:$d), + (ins RRegu16:$a, RRegu16:$b), + !strconcat(opcstr, "16\t$d, $a, $b"), + [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>; + def rr32 : InstPTX<(outs RRegu32:$d), + (ins RRegu32:$a, RRegu32:$b), + !strconcat(opcstr, "32\t$d, $a, $b"), + [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>; + def rr64 : InstPTX<(outs RRegu64:$d), + (ins RRegu64:$a, RRegu64:$b), + !strconcat(opcstr, "64\t$d, $a, $b"), + [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>; + def ri16 : InstPTX<(outs RRegu16:$d), + (ins RRegu16:$a, i16imm:$b), + !strconcat(opcstr, "16\t$d, $a, $b"), + [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>; + def ri32 : InstPTX<(outs RRegu32:$d), + (ins RRegu32:$a, i32imm:$b), + !strconcat(opcstr, "32\t$d, $a, $b"), + [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>; + def ri64 : InstPTX<(outs RRegu64:$d), + (ins RRegu64:$a, i64imm:$b), + !strconcat(opcstr, "64\t$d, $a, $b"), + [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>; + def ir16 : InstPTX<(outs RRegu16:$d), + (ins i16imm:$a, RRegu16:$b), + !strconcat(opcstr, "16\t$d, $a, $b"), + [(set RRegu16:$d, (opnode imm:$a, RRegu16:$b))]>; + def ir32 : InstPTX<(outs RRegu32:$d), + (ins i32imm:$a, RRegu32:$b), + !strconcat(opcstr, "32\t$d, $a, $b"), + [(set RRegu32:$d, (opnode imm:$a, RRegu32:$b))]>; + def ir64 : InstPTX<(outs RRegu64:$d), + (ins i64imm:$a, RRegu64:$b), + !strconcat(opcstr, "64\t$d, $a, $b"), + [(set RRegu64:$d, (opnode imm:$a, RRegu64:$b))]>; } -multiclass PTX_LD<string opstr, RegisterClass RC, PatFrag pat_load> { - def rr : InstPTX<(outs RC:$d), - (ins MEMri:$a), - !strconcat(opstr, ".%type\t$d, [$a]"), - [(set RC:$d, (pat_load ADDRrr:$a))]>; - def ri : InstPTX<(outs RC:$d), - (ins MEMri:$a), - !strconcat(opstr, ".%type\t$d, [$a]"), - [(set RC:$d, (pat_load ADDRri:$a))]>; - def ii : InstPTX<(outs RC:$d), - (ins MEMii:$a), - !strconcat(opstr, ".%type\t$d, [$a]"), - [(set RC:$d, (pat_load ADDRii:$a))]>; +multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls, + CondCode cmp, string cmpstr> { + // TODO support 5-operand format: p|q, a, b, c + + def rr + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b), + !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), + [(set Preds:$p, (setcc RC:$a, RC:$b, cmp))]>; + def ri + : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b), + !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), + [(set Preds:$p, (setcc RC:$a, imm:$b, cmp))]>; + + def rr_and_r + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (and (setcc RC:$a, RC:$b, cmp), Preds:$c))]>; + def ri_and_r + : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (and (setcc RC:$a, imm:$b, cmp), Preds:$c))]>; + def rr_or_r + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (or (setcc RC:$a, RC:$b, cmp), Preds:$c))]>; + def ri_or_r + : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (or (setcc RC:$a, imm:$b, cmp), Preds:$c))]>; + def rr_xor_r + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (xor (setcc RC:$a, RC:$b, cmp), Preds:$c))]>; + def ri_xor_r + : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), Preds:$c))]>; + + def rr_and_not_r + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (and (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>; + def ri_and_not_r + : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (and (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>; + def rr_or_not_r + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (or (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>; + def ri_or_not_r + : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (or (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>; + def rr_xor_not_r + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (xor (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>; + def ri_xor_not_r + : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>; } -multiclass PTX_ST<string opstr, RegisterClass RC, PatFrag pat_store> { - def rr : InstPTX<(outs), - (ins RC:$d, MEMri:$a), - !strconcat(opstr, ".%type\t[$a], $d"), - [(pat_store RC:$d, ADDRrr:$a)]>; - def ri : InstPTX<(outs), - (ins RC:$d, MEMri:$a), - !strconcat(opstr, ".%type\t[$a], $d"), - [(pat_store RC:$d, ADDRri:$a)]>; - def ii : InstPTX<(outs), - (ins RC:$d, MEMii:$a), - !strconcat(opstr, ".%type\t[$a], $d"), - [(pat_store RC:$d, ADDRii:$a)]>; +multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, + CondCode ucmp, CondCode ocmp, string cmpstr> { + // TODO support 5-operand format: p|q, a, b, c + + def rr_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b), + !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"), + [(set Preds:$p, (setcc RC:$a, RC:$b, ucmp))]>; + def rr_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b), + !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), + [(set Preds:$p, (setcc RC:$a, RC:$b, ocmp))]>; + + def rr_and_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>; + def rr_and_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>; + + def rr_or_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>; + def rr_or_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>; + + def rr_xor_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>; + def rr_xor_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>; + + def rr_and_not_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>; + def rr_and_not_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>; + + def rr_or_not_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>; + def rr_or_not_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>; + + def rr_xor_not_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>; + def rr_xor_not_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>; +} + +multiclass PTX_SELP<RegisterClass RC, string regclsname> { + def rr + : InstPTX<(outs RC:$r), (ins Preds:$a, RC:$b, RC:$c), + !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"), + [(set RC:$r, (select Preds:$a, RC:$b, RC:$c))]>; +} + +multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> { + def rr32 : InstPTX<(outs RC:$d), + (ins MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRrr32:$a))]>, Requires<[Use32BitAddresses]>; + def rr64 : InstPTX<(outs RC:$d), + (ins MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRrr64:$a))]>, Requires<[Use64BitAddresses]>; + def ri32 : InstPTX<(outs RC:$d), + (ins MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRri32:$a))]>, Requires<[Use32BitAddresses]>; + def ri64 : InstPTX<(outs RC:$d), + (ins MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRri64:$a))]>, Requires<[Use64BitAddresses]>; + def ii32 : InstPTX<(outs RC:$d), + (ins MEMii32:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRii32:$a))]>, Requires<[Use32BitAddresses]>; + def ii64 : InstPTX<(outs RC:$d), + (ins MEMii64:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRii64:$a))]>, Requires<[Use64BitAddresses]>; +} + +multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> { + defm u16 : PTX_LD<opstr, ".u16", RRegu16, pat_load>; + defm u32 : PTX_LD<opstr, ".u32", RRegu32, pat_load>; + defm u64 : PTX_LD<opstr, ".u64", RRegu64, pat_load>; + defm f32 : PTX_LD<opstr, ".f32", RRegf32, pat_load>; + defm f64 : PTX_LD<opstr, ".f64", RRegf64, pat_load>; +} + +multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_store> { + def rr32 : InstPTX<(outs), + (ins RC:$d, MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRrr32:$a)]>, Requires<[Use32BitAddresses]>; + def rr64 : InstPTX<(outs), + (ins RC:$d, MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRrr64:$a)]>, Requires<[Use64BitAddresses]>; + def ri32 : InstPTX<(outs), + (ins RC:$d, MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRri32:$a)]>, Requires<[Use32BitAddresses]>; + def ri64 : InstPTX<(outs), + (ins RC:$d, MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRri64:$a)]>, Requires<[Use64BitAddresses]>; + def ii32 : InstPTX<(outs), + (ins RC:$d, MEMii32:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRii32:$a)]>, Requires<[Use32BitAddresses]>; + def ii64 : InstPTX<(outs), + (ins RC:$d, MEMii64:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRii64:$a)]>, Requires<[Use64BitAddresses]>; +} + +multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> { + defm u16 : PTX_ST<opstr, ".u16", RRegu16, pat_store>; + defm u32 : PTX_ST<opstr, ".u32", RRegu32, pat_store>; + defm u64 : PTX_ST<opstr, ".u64", RRegu64, pat_store>; + defm f32 : PTX_ST<opstr, ".f32", RRegf32, pat_store>; + defm f64 : PTX_ST<opstr, ".f64", RRegf64, pat_store>; } //===----------------------------------------------------------------------===// @@ -208,50 +568,392 @@ multiclass PTX_ST<string opstr, RegisterClass RC, PatFrag pat_store> { defm ADD : INT3<"add", add>; defm SUB : INT3<"sub", sub>; +defm MUL : INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies +defm DIV : INT3<"div", udiv>; +defm REM : INT3<"rem", urem>; + +///===- Floating-Point Arithmetic Instructions ----------------------------===// + +// Standard Unary Operations +defm FNEG : PTX_FLOAT_2OP<"neg", fneg>; + +// Standard Binary Operations +defm FADD : PTX_FLOAT_3OP<"add", fadd>; +defm FSUB : PTX_FLOAT_3OP<"sub", fsub>; +defm FMUL : PTX_FLOAT_3OP<"mul", fmul>; + +// TODO: Allow user selection of rounding modes for fdiv. +// For division, we need to have f32 and f64 differently. +// For f32, we just always use .approx since it is supported on all hardware +// for PTX 1.4+, which is our minimum target. +def FDIVrr32 : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a, RRegf32:$b), + "div.approx.f32\t$d, $a, $b", + [(set RRegf32:$d, (fdiv RRegf32:$a, RRegf32:$b))]>; +def FDIVri32 : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a, f32imm:$b), + "div.approx.f32\t$d, $a, $b", + [(set RRegf32:$d, (fdiv RRegf32:$a, fpimm:$b))]>; + +// For f64, we must specify a rounding for sm 1.3+ but *not* for sm 1.0. +def FDIVrr64SM13 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a, RRegf64:$b), + "div.rn.f64\t$d, $a, $b", + [(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>, + Requires<[SupportsSM13]>; +def FDIVri64SM13 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a, f64imm:$b), + "div.rn.f64\t$d, $a, $b", + [(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>, + Requires<[SupportsSM13]>; +def FDIVrr64SM10 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a, RRegf64:$b), + "div.f64\t$d, $a, $b", + [(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>, + Requires<[DoesNotSupportSM13]>; +def FDIVri64SM10 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a, f64imm:$b), + "div.f64\t$d, $a, $b", + [(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>, + Requires<[DoesNotSupportSM13]>; + + + +// Multi-operation hybrid instructions + +// The selection of mad/fma is tricky. In some cases, they are the *same* +// instruction, but in other cases we may prefer one or the other. Also, +// different PTX versions differ on whether rounding mode flags are required. +// In the short term, mad is supported on all PTX versions and we use a +// default rounding mode no matter what shader model or PTX version. +// TODO: Allow the rounding mode to be selectable through llc. +defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, Requires<[SupportsSM13]>; +defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, Requires<[DoesNotSupportSM13]>; + +///===- Floating-Point Intrinsic Instructions -----------------------------===// + +def FSQRT32 : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a), + "sqrt.rn.f32\t$d, $a", + [(set RRegf32:$d, (fsqrt RRegf32:$a))]>; + +def FSQRT64 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a), + "sqrt.rn.f64\t$d, $a", + [(set RRegf64:$d, (fsqrt RRegf64:$a))]>; + +def FSIN32 : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a), + "sin.approx.f32\t$d, $a", + [(set RRegf32:$d, (fsin RRegf32:$a))]>; + +def FSIN64 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a), + "sin.approx.f64\t$d, $a", + [(set RRegf64:$d, (fsin RRegf64:$a))]>; + +def FCOS32 : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a), + "cos.approx.f32\t$d, $a", + [(set RRegf32:$d, (fcos RRegf32:$a))]>; + +def FCOS64 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a), + "cos.approx.f64\t$d, $a", + [(set RRegf64:$d, (fcos RRegf64:$a))]>; + + +///===- Comparison and Selection Instructions -----------------------------===// + +// Compare u16 + +defm SETPEQu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETEQ, "eq">; +defm SETPNEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETNE, "ne">; +defm SETPLTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULT, "lt">; +defm SETPLEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULE, "le">; +defm SETPGTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGT, "gt">; +defm SETPGEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGE, "ge">; + +// Compare u32 + +defm SETPEQu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETEQ, "eq">; +defm SETPNEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETNE, "ne">; +defm SETPLTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULT, "lt">; +defm SETPLEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULE, "le">; +defm SETPGTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGT, "gt">; +defm SETPGEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGE, "ge">; + +// Compare u64 + +defm SETPEQu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETEQ, "eq">; +defm SETPNEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETNE, "ne">; +defm SETPLTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULT, "lt">; +defm SETPLEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULE, "le">; +defm SETPGTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGT, "gt">; +defm SETPGEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGE, "ge">; + +// Compare f32 + +defm SETPEQf32 : PTX_SETP_FP<RRegf32, "f32", SETUEQ, SETOEQ, "eq">; +defm SETPNEf32 : PTX_SETP_FP<RRegf32, "f32", SETUNE, SETONE, "ne">; +defm SETPLTf32 : PTX_SETP_FP<RRegf32, "f32", SETULT, SETOLT, "lt">; +defm SETPLEf32 : PTX_SETP_FP<RRegf32, "f32", SETULE, SETOLE, "le">; +defm SETPGTf32 : PTX_SETP_FP<RRegf32, "f32", SETUGT, SETOGT, "gt">; +defm SETPGEf32 : PTX_SETP_FP<RRegf32, "f32", SETUGE, SETOGE, "ge">; + +// Compare f64 + +defm SETPEQf64 : PTX_SETP_FP<RRegf64, "f64", SETUEQ, SETOEQ, "eq">; +defm SETPNEf64 : PTX_SETP_FP<RRegf64, "f64", SETUNE, SETONE, "ne">; +defm SETPLTf64 : PTX_SETP_FP<RRegf64, "f64", SETULT, SETOLT, "lt">; +defm SETPLEf64 : PTX_SETP_FP<RRegf64, "f64", SETULE, SETOLE, "le">; +defm SETPGTf64 : PTX_SETP_FP<RRegf64, "f64", SETUGT, SETOGT, "gt">; +defm SETPGEf64 : PTX_SETP_FP<RRegf64, "f64", SETUGE, SETOGE, "ge">; + +// .selp + +defm PTX_SELPu16 : PTX_SELP<RRegu16, "u16">; +defm PTX_SELPu32 : PTX_SELP<RRegu32, "u32">; +defm PTX_SELPu64 : PTX_SELP<RRegu64, "u64">; +defm PTX_SELPf32 : PTX_SELP<RRegf32, "f32">; +defm PTX_SELPf64 : PTX_SELP<RRegf64, "f64">; ///===- Logic and Shift Instructions --------------------------------------===// -defm SHL : INT3ntnc<"shl.b32", PTXshl>; -defm SRL : INT3ntnc<"shr.u32", PTXsrl>; -defm SRA : INT3ntnc<"shr.s32", PTXsra>; +defm SHL : INT3ntnc<"shl.b", PTXshl>; +defm SRL : INT3ntnc<"shr.u", PTXsrl>; +defm SRA : INT3ntnc<"shr.s", PTXsra>; + +defm AND : PTX_LOGIC<"and", and>; +defm OR : PTX_LOGIC<"or", or>; +defm XOR : PTX_LOGIC<"xor", xor>; ///===- Data Movement and Conversion Instructions -------------------------===// let neverHasSideEffects = 1 in { - // rely on isMoveInstr to separate MOVpp, MOVrr, etc. - def MOVpp + def MOVPREDrr : InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>; - def MOVrr - : InstPTX<(outs RRegs32:$d), (ins RRegs32:$a), "mov.%type\t$d, $a", []>; + def MOVU16rr + : InstPTX<(outs RRegu16:$d), (ins RRegu16:$a), "mov.u16\t$d, $a", []>; + def MOVU32rr + : InstPTX<(outs RRegu32:$d), (ins RRegu32:$a), "mov.u32\t$d, $a", []>; + def MOVU64rr + : InstPTX<(outs RRegu64:$d), (ins RRegu64:$a), "mov.u64\t$d, $a", []>; + def MOVF32rr + : InstPTX<(outs RRegf32:$d), (ins RRegf32:$a), "mov.f32\t$d, $a", []>; + def MOVF64rr + : InstPTX<(outs RRegf64:$d), (ins RRegf64:$a), "mov.f64\t$d, $a", []>; } let isReMaterializable = 1, isAsCheapAsAMove = 1 in { - def MOVpi + def MOVPREDri : InstPTX<(outs Preds:$d), (ins i1imm:$a), "mov.pred\t$d, $a", [(set Preds:$d, imm:$a)]>; - def MOVri - : InstPTX<(outs RRegs32:$d), (ins i32imm:$a), "mov.s32\t$d, $a", - [(set RRegs32:$d, imm:$a)]>; + def MOVU16ri + : InstPTX<(outs RRegu16:$d), (ins i16imm:$a), "mov.u16\t$d, $a", + [(set RRegu16:$d, imm:$a)]>; + def MOVU32ri + : InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a", + [(set RRegu32:$d, imm:$a)]>; + def MOVU64ri + : InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a", + [(set RRegu64:$d, imm:$a)]>; + def MOVF32ri + : InstPTX<(outs RRegf32:$d), (ins f32imm:$a), "mov.f32\t$d, $a", + [(set RRegf32:$d, fpimm:$a)]>; + def MOVF64ri + : InstPTX<(outs RRegf64:$d), (ins f64imm:$a), "mov.f64\t$d, $a", + [(set RRegf64:$d, fpimm:$a)]>; } -defm LDg : PTX_LD<"ld.global", RRegs32, load_global>; -defm LDc : PTX_LD<"ld.const", RRegs32, load_constant>; -defm LDl : PTX_LD<"ld.local", RRegs32, load_local>; -defm LDp : PTX_LD<"ld.param", RRegs32, load_parameter>; -defm LDs : PTX_LD<"ld.shared", RRegs32, load_shared>; +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { + def MOVaddr32 + : InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a", + [(set RRegu32:$d, (PTXcopyaddress tglobaladdr:$a))]>; + def MOVaddr64 + : InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a", + [(set RRegu64:$d, (PTXcopyaddress tglobaladdr:$a))]>; +} + +// Loads +defm LDg : PTX_LD_ALL<"ld.global", load_global>; +defm LDc : PTX_LD_ALL<"ld.const", load_constant>; +defm LDl : PTX_LD_ALL<"ld.local", load_local>; +defm LDs : PTX_LD_ALL<"ld.shared", load_shared>; + +// This is a special instruction that is manually inserted for kernel parameters +def LDpiU16 : InstPTX<(outs RRegu16:$d), (ins MEMpi:$a), + "ld.param.u16\t$d, [$a]", []>; +def LDpiU32 : InstPTX<(outs RRegu32:$d), (ins MEMpi:$a), + "ld.param.u32\t$d, [$a]", []>; +def LDpiU64 : InstPTX<(outs RRegu64:$d), (ins MEMpi:$a), + "ld.param.u64\t$d, [$a]", []>; +def LDpiF32 : InstPTX<(outs RRegf32:$d), (ins MEMpi:$a), + "ld.param.f32\t$d, [$a]", []>; +def LDpiF64 : InstPTX<(outs RRegf64:$d), (ins MEMpi:$a), + "ld.param.f64\t$d, [$a]", []>; + +// Stores +defm STg : PTX_ST_ALL<"st.global", store_global>; +defm STl : PTX_ST_ALL<"st.local", store_local>; +defm STs : PTX_ST_ALL<"st.shared", store_shared>; + +// defm STp : PTX_ST_ALL<"st.param", store_parameter>; +// defm LDp : PTX_LD_ALL<"ld.param", load_parameter>; +// TODO: Do something with st.param if/when it is needed. + +// Conversion to pred + +def CVT_pred_u16 + : InstPTX<(outs Preds:$d), (ins RRegu16:$a), "cvt.pred.u16\t$d, $a", + [(set Preds:$d, (trunc RRegu16:$a))]>; -def LDpi : InstPTX<(outs RRegs32:$d), (ins MEMpi:$a), - "ld.param.%type\t$d, [$a]", []>; +def CVT_pred_u32 + : InstPTX<(outs Preds:$d), (ins RRegu32:$a), "cvt.pred.u32\t$d, $a", + [(set Preds:$d, (trunc RRegu32:$a))]>; -defm STg : PTX_ST<"st.global", RRegs32, store_global>; -defm STl : PTX_ST<"st.local", RRegs32, store_local>; -// Store to parameter state space requires PTX 2.0 or higher? -// defm STp : PTX_ST<"st.param", RRegs32, store_parameter>; -defm STs : PTX_ST<"st.shared", RRegs32, store_shared>; +def CVT_pred_u64 + : InstPTX<(outs Preds:$d), (ins RRegu64:$a), "cvt.pred.u64\t$d, $a", + [(set Preds:$d, (trunc RRegu64:$a))]>; + +def CVT_pred_f32 + : InstPTX<(outs Preds:$d), (ins RRegf32:$a), "cvt.rni.pred.f32\t$d, $a", + [(set Preds:$d, (fp_to_uint RRegf32:$a))]>; + +def CVT_pred_f64 + : InstPTX<(outs Preds:$d), (ins RRegf64:$a), "cvt.rni.pred.f64\t$d, $a", + [(set Preds:$d, (fp_to_uint RRegf64:$a))]>; + +// Conversion to u16 + +def CVT_u16_pred + : InstPTX<(outs RRegu16:$d), (ins Preds:$a), "cvt.u16.pred\t$d, $a", + [(set RRegu16:$d, (zext Preds:$a))]>; + +def CVT_u16_u32 + : InstPTX<(outs RRegu16:$d), (ins RRegu32:$a), "cvt.u16.u32\t$d, $a", + [(set RRegu16:$d, (trunc RRegu32:$a))]>; + +def CVT_u16_u64 + : InstPTX<(outs RRegu16:$d), (ins RRegu64:$a), "cvt.u16.u64\t$d, $a", + [(set RRegu16:$d, (trunc RRegu64:$a))]>; + +def CVT_u16_f32 + : InstPTX<(outs RRegu16:$d), (ins RRegf32:$a), "cvt.rni.u16.f32\t$d, $a", + [(set RRegu16:$d, (fp_to_uint RRegf32:$a))]>; + +def CVT_u16_f64 + : InstPTX<(outs RRegu16:$d), (ins RRegf64:$a), "cvt.rni.u16.f64\t$d, $a", + [(set RRegu16:$d, (fp_to_uint RRegf64:$a))]>; + +// Conversion to u32 + +def CVT_u32_pred + : InstPTX<(outs RRegu32:$d), (ins Preds:$a), "cvt.u32.pred\t$d, $a", + [(set RRegu32:$d, (zext Preds:$a))]>; + +def CVT_u32_u16 + : InstPTX<(outs RRegu32:$d), (ins RRegu16:$a), "cvt.u32.u16\t$d, $a", + [(set RRegu32:$d, (zext RRegu16:$a))]>; + +def CVT_u32_u64 + : InstPTX<(outs RRegu32:$d), (ins RRegu64:$a), "cvt.u32.u64\t$d, $a", + [(set RRegu32:$d, (trunc RRegu64:$a))]>; + +def CVT_u32_f32 + : InstPTX<(outs RRegu32:$d), (ins RRegf32:$a), "cvt.rni.u32.f32\t$d, $a", + [(set RRegu32:$d, (fp_to_uint RRegf32:$a))]>; + +def CVT_u32_f64 + : InstPTX<(outs RRegu32:$d), (ins RRegf64:$a), "cvt.rni.u32.f64\t$d, $a", + [(set RRegu32:$d, (fp_to_uint RRegf64:$a))]>; + +// Conversion to u64 + +def CVT_u64_pred + : InstPTX<(outs RRegu64:$d), (ins Preds:$a), "cvt.u64.pred\t$d, $a", + [(set RRegu64:$d, (zext Preds:$a))]>; + +def CVT_u64_u16 + : InstPTX<(outs RRegu64:$d), (ins RRegu16:$a), "cvt.u64.u16\t$d, $a", + [(set RRegu64:$d, (zext RRegu16:$a))]>; + +def CVT_u64_u32 + : InstPTX<(outs RRegu64:$d), (ins RRegu32:$a), "cvt.u64.u32\t$d, $a", + [(set RRegu64:$d, (zext RRegu32:$a))]>; + +def CVT_u64_f32 + : InstPTX<(outs RRegu64:$d), (ins RRegf32:$a), "cvt.rni.u64.f32\t$d, $a", + [(set RRegu64:$d, (fp_to_uint RRegf32:$a))]>; + +def CVT_u64_f64 + : InstPTX<(outs RRegu64:$d), (ins RRegf64:$a), "cvt.rni.u64.f64\t$d, $a", + [(set RRegu64:$d, (fp_to_uint RRegf64:$a))]>; + +// Conversion to f32 + +def CVT_f32_pred + : InstPTX<(outs RRegf32:$d), (ins Preds:$a), "cvt.rn.f32.pred\t$d, $a", + [(set RRegf32:$d, (uint_to_fp Preds:$a))]>; + +def CVT_f32_u16 + : InstPTX<(outs RRegf32:$d), (ins RRegu16:$a), "cvt.rn.f32.u16\t$d, $a", + [(set RRegf32:$d, (uint_to_fp RRegu16:$a))]>; + +def CVT_f32_u32 + : InstPTX<(outs RRegf32:$d), (ins RRegu32:$a), "cvt.rn.f32.u32\t$d, $a", + [(set RRegf32:$d, (uint_to_fp RRegu32:$a))]>; + +def CVT_f32_u64 + : InstPTX<(outs RRegf32:$d), (ins RRegu64:$a), "cvt.rn.f32.u64\t$d, $a", + [(set RRegf32:$d, (uint_to_fp RRegu64:$a))]>; + +def CVT_f32_f64 + : InstPTX<(outs RRegf32:$d), (ins RRegf64:$a), "cvt.rn.f32.f64\t$d, $a", + [(set RRegf32:$d, (fround RRegf64:$a))]>; + +// Conversion to f64 + +def CVT_f64_pred + : InstPTX<(outs RRegf64:$d), (ins Preds:$a), "cvt.rn.f64.pred\t$d, $a", + [(set RRegf64:$d, (uint_to_fp Preds:$a))]>; + +def CVT_f64_u16 + : InstPTX<(outs RRegf64:$d), (ins RRegu16:$a), "cvt.rn.f64.u16\t$d, $a", + [(set RRegf64:$d, (uint_to_fp RRegu16:$a))]>; + +def CVT_f64_u32 + : InstPTX<(outs RRegf64:$d), (ins RRegu32:$a), "cvt.rn.f64.u32\t$d, $a", + [(set RRegf64:$d, (uint_to_fp RRegu32:$a))]>; + +def CVT_f64_u64 + : InstPTX<(outs RRegf64:$d), (ins RRegu64:$a), "cvt.rn.f64.u64\t$d, $a", + [(set RRegf64:$d, (uint_to_fp RRegu64:$a))]>; + +def CVT_f64_f32 + : InstPTX<(outs RRegf64:$d), (ins RRegf32:$a), "cvt.f64.f32\t$d, $a", + [(set RRegf64:$d, (fextend RRegf32:$a))]>; ///===- Control Flow Instructions -----------------------------------------===// +let isBranch = 1, isTerminator = 1, isBarrier = 1 in { + def BRAd + : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>; +} + +let isBranch = 1, isTerminator = 1 in { + // FIXME: The pattern part is blank because I cannot (or do not yet know + // how to) use the first operand of PredicateOperand (a Preds register) here + def BRAdp + : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", + [/*(brcond pred:$_p, bb:$d)*/]>; +} + let isReturn = 1, isTerminator = 1, isBarrier = 1 in { def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>; def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>; } + +///===- Intrinsic Instructions --------------------------------------------===// + +include "PTXIntrinsicInstrInfo.td" diff --git a/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/lib/Target/PTX/PTXIntrinsicInstrInfo.td new file mode 100644 index 0000000..320934a --- /dev/null +++ b/lib/Target/PTX/PTXIntrinsicInstrInfo.td @@ -0,0 +1,84 @@ +//===- PTXIntrinsicInstrInfo.td - Defines PTX intrinsics ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the PTX-specific intrinsic instructions. +// +//===----------------------------------------------------------------------===// + +// PTX Special Purpose Register Accessor Intrinsics + +class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop> + : InstPTX<(outs RRegu64:$d), (ins), + !strconcat("mov.u64\t$d, %", regname), + [(set RRegu64:$d, (intop))]>; + +class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop> + : InstPTX<(outs RRegu32:$d), (ins), + !strconcat("mov.u32\t$d, %", regname), + [(set RRegu32:$d, (intop))]>; + +// TODO Add read vector-version of special registers + +//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid", int_ptx_read_tid_r64>; +def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", int_ptx_read_tid_x>; +def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", int_ptx_read_tid_y>; +def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", int_ptx_read_tid_z>; +def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", int_ptx_read_tid_w>; + +//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid", int_ptx_read_ntid_r64>; +def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", int_ptx_read_ntid_x>; +def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", int_ptx_read_ntid_y>; +def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", int_ptx_read_ntid_z>; +def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", int_ptx_read_ntid_w>; + +def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", int_ptx_read_laneid>; +def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", int_ptx_read_warpid>; +def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", int_ptx_read_nwarpid>; + +//def PTX_READ_CTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>; +def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", int_ptx_read_ctaid_x>; +def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", int_ptx_read_ctaid_y>; +def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", int_ptx_read_ctaid_z>; +def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", int_ptx_read_ctaid_w>; + +//def PTX_READ_NCTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>; +def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", int_ptx_read_nctaid_x>; +def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", int_ptx_read_nctaid_y>; +def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", int_ptx_read_nctaid_z>; +def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", int_ptx_read_nctaid_w>; + +def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", int_ptx_read_smid>; +def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", int_ptx_read_nsmid>; +def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid", int_ptx_read_gridid>; + +def PTX_READ_LANEMASK_EQ + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>; +def PTX_READ_LANEMASK_LE + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>; +def PTX_READ_LANEMASK_LT + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>; +def PTX_READ_LANEMASK_GE + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>; +def PTX_READ_LANEMASK_GT + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>; + +def PTX_READ_CLOCK + : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>; +def PTX_READ_CLOCK64 + : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>; + +def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>; +def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>; +def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>; +def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>; + +// PTX Parallel Synchronization and Communication Intrinsics + +def PTX_BAR_SYNC : InstPTX<(outs), (ins i32imm:$i), "bar.sync\t$i", + [(int_ptx_bar_sync imm:$i)]>; diff --git a/lib/Target/PTX/PTXMCAsmStreamer.cpp b/lib/Target/PTX/PTXMCAsmStreamer.cpp index 0886ba8..1574670 100644 --- a/lib/Target/PTX/PTXMCAsmStreamer.cpp +++ b/lib/Target/PTX/PTXMCAsmStreamer.cpp @@ -143,9 +143,9 @@ public: virtual void EmitBytes(StringRef Data, unsigned AddrSpace); virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, - bool isPCRel, unsigned AddrSpace); - virtual void EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace = 0); - virtual void EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace = 0); + unsigned AddrSpace); + virtual void EmitULEB128Value(const MCExpr *Value); + virtual void EmitSLEB128Value(const MCExpr *Value); virtual void EmitGPRel32Value(const MCExpr *Value); @@ -233,7 +233,7 @@ void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) { void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); - assert(getCurrentSection() && "Cannot emit before setting section!"); + //assert(getCurrentSection() && "Cannot emit before setting section!"); OS << *Symbol << MAI.getLabelSuffix(); EmitEOL(); @@ -352,9 +352,8 @@ void PTXMCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { } void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, - bool isPCRel, unsigned AddrSpace) { + unsigned AddrSpace) { assert(getCurrentSection() && "Cannot emit contents before setting section!"); - assert(!isPCRel && "Cannot emit pc relative relocations!"); const char *Directive = 0; switch (Size) { default: break; @@ -383,15 +382,13 @@ void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, EmitEOL(); } -void PTXMCAsmStreamer::EmitULEB128Value(const MCExpr *Value, - unsigned AddrSpace) { +void PTXMCAsmStreamer::EmitULEB128Value(const MCExpr *Value) { assert(MAI.hasLEB128() && "Cannot print a .uleb"); OS << ".uleb128 " << *Value; EmitEOL(); } -void PTXMCAsmStreamer::EmitSLEB128Value(const MCExpr *Value, - unsigned AddrSpace) { +void PTXMCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) { assert(MAI.hasLEB128() && "Cannot print a .sleb"); OS << ".sleb128 " << *Value; EmitEOL(); @@ -423,7 +420,8 @@ void PTXMCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue, MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace); } -void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, +void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, + int64_t Value, unsigned ValueSize, unsigned MaxBytesToEmit) { // Some assemblers don't support non-power of two alignments, so we always @@ -532,7 +530,7 @@ void PTXMCAsmStreamer::Finish() {} namespace llvm { MCStreamer *createPTXAsmStreamer(MCContext &Context, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useLoc, + bool isVerboseAsm, bool useLoc, bool useCFI, MCInstPrinter *IP, MCCodeEmitter *CE, TargetAsmBackend *TAB, bool ShowInst) { diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp index b37c740..c5e1910 100644 --- a/lib/Target/PTX/PTXMFInfoExtract.cpp +++ b/lib/Target/PTX/PTXMFInfoExtract.cpp @@ -79,12 +79,12 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) { DEBUG(for (PTXMachineFunctionInfo::reg_iterator i = MFI->argRegBegin(), e = MFI->argRegEnd(); - i != e; ++i) + i != e; ++i) dbgs() << "Arg Reg: " << *i << "\n";); DEBUG(for (PTXMachineFunctionInfo::reg_iterator i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd(); - i != e; ++i) + i != e; ++i) dbgs() << "Local Var Reg: " << *i << "\n";); return false; diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h index 56d044b..81df1c2 100644 --- a/lib/Target/PTX/PTXMachineFunctionInfo.h +++ b/lib/Target/PTX/PTXMachineFunctionInfo.h @@ -42,36 +42,37 @@ public: void setRetReg(unsigned reg) { reg_ret = reg; } void doneAddArg(void) { - std::sort(reg_arg.begin(), reg_arg.end()); _isDoneAddArg = true; } - void doneAddLocalVar(void) { - std::sort(reg_local_var.begin(), reg_local_var.end()); - } + void doneAddLocalVar(void) {} bool isDoneAddArg(void) { return _isDoneAddArg; } bool isKernel() const { return is_kernel; } - typedef std::vector<unsigned>::const_iterator reg_iterator; + typedef std::vector<unsigned>::const_iterator reg_iterator; + typedef std::vector<unsigned>::const_reverse_iterator reg_reverse_iterator; - bool argRegEmpty() const { return reg_arg.empty(); } - int getNumArg() const { return reg_arg.size(); } + bool argRegEmpty() const { return reg_arg.empty(); } + int getNumArg() const { return reg_arg.size(); } reg_iterator argRegBegin() const { return reg_arg.begin(); } reg_iterator argRegEnd() const { return reg_arg.end(); } + reg_reverse_iterator argRegReverseBegin() const { return reg_arg.rbegin(); } + reg_reverse_iterator argRegReverseEnd() const { return reg_arg.rend(); } - bool localVarRegEmpty() const { return reg_local_var.empty(); } + bool localVarRegEmpty() const { return reg_local_var.empty(); } reg_iterator localVarRegBegin() const { return reg_local_var.begin(); } reg_iterator localVarRegEnd() const { return reg_local_var.end(); } unsigned retReg() const { return reg_ret; } bool isArgReg(unsigned reg) const { - return std::binary_search(reg_arg.begin(), reg_arg.end(), reg); + return std::find(reg_arg.begin(), reg_arg.end(), reg) != reg_arg.end(); } bool isLocalVarReg(unsigned reg) const { - return std::binary_search(reg_local_var.begin(), reg_local_var.end(), reg); + return std::find(reg_local_var.begin(), reg_local_var.end(), reg) + != reg_local_var.end(); } }; // class PTXMachineFunctionInfo } // namespace llvm diff --git a/lib/Target/PTX/PTXRegisterInfo.td b/lib/Target/PTX/PTXRegisterInfo.td index 22e2b34..f616141 100644 --- a/lib/Target/PTX/PTXRegisterInfo.td +++ b/lib/Target/PTX/PTXRegisterInfo.td @@ -19,6 +19,8 @@ class PTXReg<string n> : Register<n> { // Registers //===----------------------------------------------------------------------===// +///===- Predicate Registers -----------------------------------------------===// + def P0 : PTXReg<"p0">; def P1 : PTXReg<"p1">; def P2 : PTXReg<"p2">; @@ -51,6 +53,108 @@ def P28 : PTXReg<"p28">; def P29 : PTXReg<"p29">; def P30 : PTXReg<"p30">; def P31 : PTXReg<"p31">; +def P32 : PTXReg<"p32">; +def P33 : PTXReg<"p33">; +def P34 : PTXReg<"p34">; +def P35 : PTXReg<"p35">; +def P36 : PTXReg<"p36">; +def P37 : PTXReg<"p37">; +def P38 : PTXReg<"p38">; +def P39 : PTXReg<"p39">; +def P40 : PTXReg<"p40">; +def P41 : PTXReg<"p41">; +def P42 : PTXReg<"p42">; +def P43 : PTXReg<"p43">; +def P44 : PTXReg<"p44">; +def P45 : PTXReg<"p45">; +def P46 : PTXReg<"p46">; +def P47 : PTXReg<"p47">; +def P48 : PTXReg<"p48">; +def P49 : PTXReg<"p49">; +def P50 : PTXReg<"p50">; +def P51 : PTXReg<"p51">; +def P52 : PTXReg<"p52">; +def P53 : PTXReg<"p53">; +def P54 : PTXReg<"p54">; +def P55 : PTXReg<"p55">; +def P56 : PTXReg<"p56">; +def P57 : PTXReg<"p57">; +def P58 : PTXReg<"p58">; +def P59 : PTXReg<"p59">; +def P60 : PTXReg<"p60">; +def P61 : PTXReg<"p61">; +def P62 : PTXReg<"p62">; +def P63 : PTXReg<"p63">; + +///===- 16-bit Integer Registers ------------------------------------------===// + +def RH0 : PTXReg<"rh0">; +def RH1 : PTXReg<"rh1">; +def RH2 : PTXReg<"rh2">; +def RH3 : PTXReg<"rh3">; +def RH4 : PTXReg<"rh4">; +def RH5 : PTXReg<"rh5">; +def RH6 : PTXReg<"rh6">; +def RH7 : PTXReg<"rh7">; +def RH8 : PTXReg<"rh8">; +def RH9 : PTXReg<"rh9">; +def RH10 : PTXReg<"rh10">; +def RH11 : PTXReg<"rh11">; +def RH12 : PTXReg<"rh12">; +def RH13 : PTXReg<"rh13">; +def RH14 : PTXReg<"rh14">; +def RH15 : PTXReg<"rh15">; +def RH16 : PTXReg<"rh16">; +def RH17 : PTXReg<"rh17">; +def RH18 : PTXReg<"rh18">; +def RH19 : PTXReg<"rh19">; +def RH20 : PTXReg<"rh20">; +def RH21 : PTXReg<"rh21">; +def RH22 : PTXReg<"rh22">; +def RH23 : PTXReg<"rh23">; +def RH24 : PTXReg<"rh24">; +def RH25 : PTXReg<"rh25">; +def RH26 : PTXReg<"rh26">; +def RH27 : PTXReg<"rh27">; +def RH28 : PTXReg<"rh28">; +def RH29 : PTXReg<"rh29">; +def RH30 : PTXReg<"rh30">; +def RH31 : PTXReg<"rh31">; +def RH32 : PTXReg<"rh32">; +def RH33 : PTXReg<"rh33">; +def RH34 : PTXReg<"rh34">; +def RH35 : PTXReg<"rh35">; +def RH36 : PTXReg<"rh36">; +def RH37 : PTXReg<"rh37">; +def RH38 : PTXReg<"rh38">; +def RH39 : PTXReg<"rh39">; +def RH40 : PTXReg<"rh40">; +def RH41 : PTXReg<"rh41">; +def RH42 : PTXReg<"rh42">; +def RH43 : PTXReg<"rh43">; +def RH44 : PTXReg<"rh44">; +def RH45 : PTXReg<"rh45">; +def RH46 : PTXReg<"rh46">; +def RH47 : PTXReg<"rh47">; +def RH48 : PTXReg<"rh48">; +def RH49 : PTXReg<"rh49">; +def RH50 : PTXReg<"rh50">; +def RH51 : PTXReg<"rh51">; +def RH52 : PTXReg<"rh52">; +def RH53 : PTXReg<"rh53">; +def RH54 : PTXReg<"rh54">; +def RH55 : PTXReg<"rh55">; +def RH56 : PTXReg<"rh56">; +def RH57 : PTXReg<"rh57">; +def RH58 : PTXReg<"rh58">; +def RH59 : PTXReg<"rh59">; +def RH60 : PTXReg<"rh60">; +def RH61 : PTXReg<"rh61">; +def RH62 : PTXReg<"rh62">; +def RH63 : PTXReg<"rh63">; + + +///===- 32-bit Integer Registers ------------------------------------------===// def R0 : PTXReg<"r0">; def R1 : PTXReg<"r1">; @@ -84,6 +188,243 @@ def R28 : PTXReg<"r28">; def R29 : PTXReg<"r29">; def R30 : PTXReg<"r30">; def R31 : PTXReg<"r31">; +def R32 : PTXReg<"r32">; +def R33 : PTXReg<"r33">; +def R34 : PTXReg<"r34">; +def R35 : PTXReg<"r35">; +def R36 : PTXReg<"r36">; +def R37 : PTXReg<"r37">; +def R38 : PTXReg<"r38">; +def R39 : PTXReg<"r39">; +def R40 : PTXReg<"r40">; +def R41 : PTXReg<"r41">; +def R42 : PTXReg<"r42">; +def R43 : PTXReg<"r43">; +def R44 : PTXReg<"r44">; +def R45 : PTXReg<"r45">; +def R46 : PTXReg<"r46">; +def R47 : PTXReg<"r47">; +def R48 : PTXReg<"r48">; +def R49 : PTXReg<"r49">; +def R50 : PTXReg<"r50">; +def R51 : PTXReg<"r51">; +def R52 : PTXReg<"r52">; +def R53 : PTXReg<"r53">; +def R54 : PTXReg<"r54">; +def R55 : PTXReg<"r55">; +def R56 : PTXReg<"r56">; +def R57 : PTXReg<"r57">; +def R58 : PTXReg<"r58">; +def R59 : PTXReg<"r59">; +def R60 : PTXReg<"r60">; +def R61 : PTXReg<"r61">; +def R62 : PTXReg<"r62">; +def R63 : PTXReg<"r63">; + + +///===- 64-bit Integer Registers ------------------------------------------===// + +def RD0 : PTXReg<"rd0">; +def RD1 : PTXReg<"rd1">; +def RD2 : PTXReg<"rd2">; +def RD3 : PTXReg<"rd3">; +def RD4 : PTXReg<"rd4">; +def RD5 : PTXReg<"rd5">; +def RD6 : PTXReg<"rd6">; +def RD7 : PTXReg<"rd7">; +def RD8 : PTXReg<"rd8">; +def RD9 : PTXReg<"rd9">; +def RD10 : PTXReg<"rd10">; +def RD11 : PTXReg<"rd11">; +def RD12 : PTXReg<"rd12">; +def RD13 : PTXReg<"rd13">; +def RD14 : PTXReg<"rd14">; +def RD15 : PTXReg<"rd15">; +def RD16 : PTXReg<"rd16">; +def RD17 : PTXReg<"rd17">; +def RD18 : PTXReg<"rd18">; +def RD19 : PTXReg<"rd19">; +def RD20 : PTXReg<"rd20">; +def RD21 : PTXReg<"rd21">; +def RD22 : PTXReg<"rd22">; +def RD23 : PTXReg<"rd23">; +def RD24 : PTXReg<"rd24">; +def RD25 : PTXReg<"rd25">; +def RD26 : PTXReg<"rd26">; +def RD27 : PTXReg<"rd27">; +def RD28 : PTXReg<"rd28">; +def RD29 : PTXReg<"rd29">; +def RD30 : PTXReg<"rd30">; +def RD31 : PTXReg<"rd31">; +def RD32 : PTXReg<"rd32">; +def RD33 : PTXReg<"rd33">; +def RD34 : PTXReg<"rd34">; +def RD35 : PTXReg<"rd35">; +def RD36 : PTXReg<"rd36">; +def RD37 : PTXReg<"rd37">; +def RD38 : PTXReg<"rd38">; +def RD39 : PTXReg<"rd39">; +def RD40 : PTXReg<"rd40">; +def RD41 : PTXReg<"rd41">; +def RD42 : PTXReg<"rd42">; +def RD43 : PTXReg<"rd43">; +def RD44 : PTXReg<"rd44">; +def RD45 : PTXReg<"rd45">; +def RD46 : PTXReg<"rd46">; +def RD47 : PTXReg<"rd47">; +def RD48 : PTXReg<"rd48">; +def RD49 : PTXReg<"rd49">; +def RD50 : PTXReg<"rd50">; +def RD51 : PTXReg<"rd51">; +def RD52 : PTXReg<"rd52">; +def RD53 : PTXReg<"rd53">; +def RD54 : PTXReg<"rd54">; +def RD55 : PTXReg<"rd55">; +def RD56 : PTXReg<"rd56">; +def RD57 : PTXReg<"rd57">; +def RD58 : PTXReg<"rd58">; +def RD59 : PTXReg<"rd59">; +def RD60 : PTXReg<"rd60">; +def RD61 : PTXReg<"rd61">; +def RD62 : PTXReg<"rd62">; +def RD63 : PTXReg<"rd63">; + + +///===- 32-bit Floating-Point Registers -----------------------------------===// + +def F0 : PTXReg<"f0">; +def F1 : PTXReg<"f1">; +def F2 : PTXReg<"f2">; +def F3 : PTXReg<"f3">; +def F4 : PTXReg<"f4">; +def F5 : PTXReg<"f5">; +def F6 : PTXReg<"f6">; +def F7 : PTXReg<"f7">; +def F8 : PTXReg<"f8">; +def F9 : PTXReg<"f9">; +def F10 : PTXReg<"f10">; +def F11 : PTXReg<"f11">; +def F12 : PTXReg<"f12">; +def F13 : PTXReg<"f13">; +def F14 : PTXReg<"f14">; +def F15 : PTXReg<"f15">; +def F16 : PTXReg<"f16">; +def F17 : PTXReg<"f17">; +def F18 : PTXReg<"f18">; +def F19 : PTXReg<"f19">; +def F20 : PTXReg<"f20">; +def F21 : PTXReg<"f21">; +def F22 : PTXReg<"f22">; +def F23 : PTXReg<"f23">; +def F24 : PTXReg<"f24">; +def F25 : PTXReg<"f25">; +def F26 : PTXReg<"f26">; +def F27 : PTXReg<"f27">; +def F28 : PTXReg<"f28">; +def F29 : PTXReg<"f29">; +def F30 : PTXReg<"f30">; +def F31 : PTXReg<"f31">; +def F32 : PTXReg<"f32">; +def F33 : PTXReg<"f33">; +def F34 : PTXReg<"f34">; +def F35 : PTXReg<"f35">; +def F36 : PTXReg<"f36">; +def F37 : PTXReg<"f37">; +def F38 : PTXReg<"f38">; +def F39 : PTXReg<"f39">; +def F40 : PTXReg<"f40">; +def F41 : PTXReg<"f41">; +def F42 : PTXReg<"f42">; +def F43 : PTXReg<"f43">; +def F44 : PTXReg<"f44">; +def F45 : PTXReg<"f45">; +def F46 : PTXReg<"f46">; +def F47 : PTXReg<"f47">; +def F48 : PTXReg<"f48">; +def F49 : PTXReg<"f49">; +def F50 : PTXReg<"f50">; +def F51 : PTXReg<"f51">; +def F52 : PTXReg<"f52">; +def F53 : PTXReg<"f53">; +def F54 : PTXReg<"f54">; +def F55 : PTXReg<"f55">; +def F56 : PTXReg<"f56">; +def F57 : PTXReg<"f57">; +def F58 : PTXReg<"f58">; +def F59 : PTXReg<"f59">; +def F60 : PTXReg<"f60">; +def F61 : PTXReg<"f61">; +def F62 : PTXReg<"f62">; +def F63 : PTXReg<"f63">; + + +///===- 64-bit Floating-Point Registers -----------------------------------===// + +def FD0 : PTXReg<"fd0">; +def FD1 : PTXReg<"fd1">; +def FD2 : PTXReg<"fd2">; +def FD3 : PTXReg<"fd3">; +def FD4 : PTXReg<"fd4">; +def FD5 : PTXReg<"fd5">; +def FD6 : PTXReg<"fd6">; +def FD7 : PTXReg<"fd7">; +def FD8 : PTXReg<"fd8">; +def FD9 : PTXReg<"fd9">; +def FD10 : PTXReg<"fd10">; +def FD11 : PTXReg<"fd11">; +def FD12 : PTXReg<"fd12">; +def FD13 : PTXReg<"fd13">; +def FD14 : PTXReg<"fd14">; +def FD15 : PTXReg<"fd15">; +def FD16 : PTXReg<"fd16">; +def FD17 : PTXReg<"fd17">; +def FD18 : PTXReg<"fd18">; +def FD19 : PTXReg<"fd19">; +def FD20 : PTXReg<"fd20">; +def FD21 : PTXReg<"fd21">; +def FD22 : PTXReg<"fd22">; +def FD23 : PTXReg<"fd23">; +def FD24 : PTXReg<"fd24">; +def FD25 : PTXReg<"fd25">; +def FD26 : PTXReg<"fd26">; +def FD27 : PTXReg<"fd27">; +def FD28 : PTXReg<"fd28">; +def FD29 : PTXReg<"fd29">; +def FD30 : PTXReg<"fd30">; +def FD31 : PTXReg<"fd31">; +def FD32 : PTXReg<"fd32">; +def FD33 : PTXReg<"fd33">; +def FD34 : PTXReg<"fd34">; +def FD35 : PTXReg<"fd35">; +def FD36 : PTXReg<"fd36">; +def FD37 : PTXReg<"fd37">; +def FD38 : PTXReg<"fd38">; +def FD39 : PTXReg<"fd39">; +def FD40 : PTXReg<"fd40">; +def FD41 : PTXReg<"fd41">; +def FD42 : PTXReg<"fd42">; +def FD43 : PTXReg<"fd43">; +def FD44 : PTXReg<"fd44">; +def FD45 : PTXReg<"fd45">; +def FD46 : PTXReg<"f4d6">; +def FD47 : PTXReg<"fd47">; +def FD48 : PTXReg<"fd48">; +def FD49 : PTXReg<"fd49">; +def FD50 : PTXReg<"fd50">; +def FD51 : PTXReg<"fd51">; +def FD52 : PTXReg<"fd52">; +def FD53 : PTXReg<"fd53">; +def FD54 : PTXReg<"fd54">; +def FD55 : PTXReg<"fd55">; +def FD56 : PTXReg<"fd56">; +def FD57 : PTXReg<"fd57">; +def FD58 : PTXReg<"fd58">; +def FD59 : PTXReg<"fd59">; +def FD60 : PTXReg<"fd60">; +def FD61 : PTXReg<"fd61">; +def FD62 : PTXReg<"fd62">; +def FD63 : PTXReg<"fd63">; + //===----------------------------------------------------------------------===// // Register classes @@ -93,10 +434,58 @@ def Preds : RegisterClass<"PTX", [i1], 8, [P0, P1, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11, P12, P13, P14, P15, P16, P17, P18, P19, P20, P21, P22, P23, - P24, P25, P26, P27, P28, P29, P30, P31]>; + P24, P25, P26, P27, P28, P29, P30, P31, + P32, P33, P34, P35, P36, P37, P38, P39, + P40, P41, P42, P43, P44, P45, P46, P47, + P48, P49, P50, P51, P52, P53, P54, P55, + P56, P57, P58, P59, P60, P61, P62, P63]>; -def RRegs32 : RegisterClass<"PTX", [i32], 32, +def RRegu16 : RegisterClass<"PTX", [i16], 16, + [RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, + RH8, RH9, RH10, RH11, RH12, RH13, RH14, RH15, + RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, + RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, + RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, + RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, + RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, + RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63]>; + +def RRegu32 : RegisterClass<"PTX", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, - R24, R25, R26, R27, R28, R29, R30, R31]>; + R24, R25, R26, R27, R28, R29, R30, R31, + R32, R33, R34, R35, R36, R37, R38, R39, + R40, R41, R42, R43, R44, R45, R46, R47, + R48, R49, R50, R51, R52, R53, R54, R55, + R56, R57, R58, R59, R60, R61, R62, R63]>; + +def RRegu64 : RegisterClass<"PTX", [i64], 64, + [RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, + RD8, RD9, RD10, RD11, RD12, RD13, RD14, RD15, + RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, + RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, + RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, + RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, + RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, + RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63]>; + +def RRegf32 : RegisterClass<"PTX", [f32], 32, + [F0, F1, F2, F3, F4, F5, F6, F7, + F8, F9, F10, F11, F12, F13, F14, F15, + F16, F17, F18, F19, F20, F21, F22, F23, + F24, F25, F26, F27, F28, F29, F30, F31, + F32, F33, F34, F35, F36, F37, F38, F39, + F40, F41, F42, F43, F44, F45, F46, F47, + F48, F49, F50, F51, F52, F53, F54, F55, + F56, F57, F58, F59, F60, F61, F62, F63]>; + +def RRegf64 : RegisterClass<"PTX", [f64], 64, + [FD0, FD1, FD2, FD3, FD4, FD5, FD6, FD7, + FD8, FD9, FD10, FD11, FD12, FD13, FD14, FD15, + FD16, FD17, FD18, FD19, FD20, FD21, FD22, FD23, + FD24, FD25, FD26, FD27, FD28, FD29, FD30, FD31, + FD32, FD33, FD34, FD35, FD36, FD37, FD38, FD39, + FD40, FD41, FD42, FD43, FD44, FD45, FD46, FD47, + FD48, FD49, FD50, FD51, FD52, FD53, FD54, FD55, + FD56, FD57, FD58, FD59, FD60, FD61, FD62, FD63]>; diff --git a/lib/Target/PTX/PTXSubtarget.cpp b/lib/Target/PTX/PTXSubtarget.cpp index 00e2c88..a224f2b 100644 --- a/lib/Target/PTX/PTXSubtarget.cpp +++ b/lib/Target/PTX/PTXSubtarget.cpp @@ -12,12 +12,36 @@ //===----------------------------------------------------------------------===// #include "PTXSubtarget.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; -PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS) { - std::string TARGET = "sm_20"; - // TODO: call ParseSubtargetFeatures(FS, TARGET); +PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS, + bool is64Bit) + : PTXShaderModel(PTX_SM_1_0), + PTXVersion(PTX_VERSION_2_0), + SupportsDouble(false), + Is64Bit(is64Bit) { + std::string TARGET = "generic"; + ParseSubtargetFeatures(FS, TARGET); +} + +std::string PTXSubtarget::getTargetString() const { + switch(PTXShaderModel) { + default: llvm_unreachable("Unknown shader model"); + case PTX_SM_1_0: return "sm_10"; + case PTX_SM_1_3: return "sm_13"; + case PTX_SM_2_0: return "sm_20"; + } +} + +std::string PTXSubtarget::getPTXVersionString() const { + switch(PTXVersion) { + default: llvm_unreachable("Unknown PTX version"); + case PTX_VERSION_2_0: return "2.0"; + case PTX_VERSION_2_1: return "2.1"; + case PTX_VERSION_2_2: return "2.2"; + } } #include "PTXGenSubtarget.inc" diff --git a/lib/Target/PTX/PTXSubtarget.h b/lib/Target/PTX/PTXSubtarget.h index 7fd85f8..47d9842 100644 --- a/lib/Target/PTX/PTXSubtarget.h +++ b/lib/Target/PTX/PTXSubtarget.h @@ -19,10 +19,57 @@ namespace llvm { class PTXSubtarget : public TargetSubtarget { private: - bool is_sm20; + + /** + * Enumeration of Shader Models supported by the back-end. + */ + enum PTXShaderModelEnum { + PTX_SM_1_0, /*< Shader Model 1.0 */ + PTX_SM_1_3, /*< Shader Model 1.3 */ + PTX_SM_2_0 /*< Shader Model 2.0 */ + }; + + /** + * Enumeration of PTX versions supported by the back-end. + * + * Currently, PTX 2.0 is the minimum supported version. + */ + enum PTXVersionEnum { + PTX_VERSION_2_0, /*< PTX Version 2.0 */ + PTX_VERSION_2_1, /*< PTX Version 2.1 */ + PTX_VERSION_2_2 /*< PTX Version 2.2 */ + }; + + /// Shader Model supported on the target GPU. + PTXShaderModelEnum PTXShaderModel; + + /// PTX Language Version. + PTXVersionEnum PTXVersion; + + // The native .f64 type is supported on the hardware. + bool SupportsDouble; + + // Use .u64 instead of .u32 for addresses. + bool Is64Bit; public: - PTXSubtarget(const std::string &TT, const std::string &FS); + PTXSubtarget(const std::string &TT, const std::string &FS, bool is64Bit); + + std::string getTargetString() const; + + std::string getPTXVersionString() const; + + bool supportsDouble() const { return SupportsDouble; } + + bool is64Bit() const { return Is64Bit; } + + bool supportsSM13() const { return PTXShaderModel >= PTX_SM_1_3; } + + bool supportsSM20() const { return PTXShaderModel >= PTX_SM_2_0; } + + bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; } + + bool supportsPTX22() const { return PTXVersion >= PTX_VERSION_2_2; } std::string ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp index b263813..1b737c9 100644 --- a/lib/Target/PTX/PTXTargetMachine.cpp +++ b/lib/Target/PTX/PTXTargetMachine.cpp @@ -16,12 +16,14 @@ #include "PTXTargetMachine.h" #include "llvm/PassManager.h" #include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; namespace llvm { MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, bool isVerboseAsm, bool useLoc, + bool useCFI, MCInstPrinter *InstPrint, MCCodeEmitter *CE, TargetAsmBackend *TAB, @@ -29,21 +31,47 @@ namespace llvm { } extern "C" void LLVMInitializePTXTarget() { - RegisterTargetMachine<PTXTargetMachine> X(ThePTXTarget); - RegisterAsmInfo<PTXMCAsmInfo> Y(ThePTXTarget); - TargetRegistry::RegisterAsmStreamer(ThePTXTarget, createPTXAsmStreamer); + + RegisterTargetMachine<PTX32TargetMachine> X(ThePTX32Target); + RegisterTargetMachine<PTX64TargetMachine> Y(ThePTX64Target); + + RegisterAsmInfo<PTXMCAsmInfo> Z(ThePTX32Target); + RegisterAsmInfo<PTXMCAsmInfo> W(ThePTX64Target); + + TargetRegistry::RegisterAsmStreamer(ThePTX32Target, createPTXAsmStreamer); + TargetRegistry::RegisterAsmStreamer(ThePTX64Target, createPTXAsmStreamer); +} + +namespace { + const char* DataLayout32 = + "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; + const char* DataLayout64 = + "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; } // DataLayout and FrameLowering are filled with dummy data PTXTargetMachine::PTXTargetMachine(const Target &T, const std::string &TT, - const std::string &FS) + const std::string &FS, + bool is64Bit) : LLVMTargetMachine(T, TT), - DataLayout("e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"), + DataLayout(is64Bit ? DataLayout64 : DataLayout32), + Subtarget(TT, FS, is64Bit), FrameLowering(Subtarget), InstrInfo(*this), - TLInfo(*this), - Subtarget(TT, FS) { + TLInfo(*this) { +} + +PTX32TargetMachine::PTX32TargetMachine(const Target &T, + const std::string& TT, + const std::string& FS) + : PTXTargetMachine(T, TT, FS, false) { +} + +PTX64TargetMachine::PTX64TargetMachine(const Target &T, + const std::string& TT, + const std::string& FS) + : PTXTargetMachine(T, TT, FS, true) { } bool PTXTargetMachine::addInstSelector(PassManagerBase &PM, diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h index 728e36f..149be8e 100644 --- a/lib/Target/PTX/PTXTargetMachine.h +++ b/lib/Target/PTX/PTXTargetMachine.h @@ -25,15 +25,15 @@ namespace llvm { class PTXTargetMachine : public LLVMTargetMachine { private: - const TargetData DataLayout; - PTXFrameLowering FrameLowering; - PTXInstrInfo InstrInfo; + const TargetData DataLayout; + PTXSubtarget Subtarget; // has to be initialized before FrameLowering + PTXFrameLowering FrameLowering; + PTXInstrInfo InstrInfo; PTXTargetLowering TLInfo; - PTXSubtarget Subtarget; public: PTXTargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &FS, bool is64Bit); virtual const TargetData *getTargetData() const { return &DataLayout; } @@ -55,6 +55,22 @@ class PTXTargetMachine : public LLVMTargetMachine { virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); }; // class PTXTargetMachine + + +class PTX32TargetMachine : public PTXTargetMachine { +public: + + PTX32TargetMachine(const Target &T, const std::string &TT, + const std::string& FS); +}; // class PTX32TargetMachine + +class PTX64TargetMachine : public PTXTargetMachine { +public: + + PTX64TargetMachine(const Target &T, const std::string &TT, + const std::string& FS); +}; // class PTX32TargetMachine + } // namespace llvm #endif // PTX_TARGET_MACHINE_H diff --git a/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp b/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp index a577d77..9df6c75 100644 --- a/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp +++ b/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp @@ -13,9 +13,13 @@ using namespace llvm; -Target llvm::ThePTXTarget; +Target llvm::ThePTX32Target; +Target llvm::ThePTX64Target; extern "C" void LLVMInitializePTXTargetInfo() { // see llvm/ADT/Triple.h - RegisterTarget<Triple::ptx> X(ThePTXTarget, "ptx", "PTX"); + RegisterTarget<Triple::ptx32> X32(ThePTX32Target, "ptx32", + "PTX (32-bit) [Experimental]"); + RegisterTarget<Triple::ptx64> X64(ThePTX64Target, "ptx64", + "PTX (64-bit) [Experimental]"); } diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index ebc10da..9cf9db9 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -17,13 +17,16 @@ #include "llvm/MC/MCInstPrinter.h" namespace llvm { - class MCOperand; + +class MCOperand; +class TargetMachine; class PPCInstPrinter : public MCInstPrinter { // 0 -> AIX, 1 -> Darwin. unsigned SyntaxVariant; public: - PPCInstPrinter(const MCAsmInfo &MAI, unsigned syntaxVariant) + PPCInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI, + unsigned syntaxVariant) : MCInstPrinter(MAI), SyntaxVariant(syntaxVariant) {} bool isDarwinSyntax() const { diff --git a/lib/Target/PowerPC/PPCAsmBackend.cpp b/lib/Target/PowerPC/PPCAsmBackend.cpp index c4d4ac9..f562a3f 100644 --- a/lib/Target/PowerPC/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/PPCAsmBackend.cpp @@ -110,10 +110,8 @@ namespace { TargetAsmBackend *llvm::createPPCAsmBackend(const Target &T, const std::string &TT) { - switch (Triple(TT).getOS()) { - case Triple::Darwin: + if (Triple(TT).isOSDarwin()) return new DarwinPPCAsmBackend(T); - default: - return 0; - } + + return 0; } diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 8ed5d7f..09a9be9 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -680,9 +680,10 @@ static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm, } static MCInstPrinter *createPPCMCInstPrinter(const Target &T, + TargetMachine &TM, unsigned SyntaxVariant, const MCAsmInfo &MAI) { - return new PPCInstPrinter(MAI, SyntaxVariant); + return new PPCInstPrinter(TM, MAI, SyntaxVariant); } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 70d00e4..128522c 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -899,7 +899,8 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, short Imm; if (isIntS16Immediate(CN, Imm)) { Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); - Base = DAG.getRegister(PPC::R0, CN->getValueType(0)); + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + CN->getValueType(0)); return true; } @@ -947,7 +948,8 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, } // Otherwise, do it the hard way, using R0 as the base register. - Base = DAG.getRegister(PPC::R0, N.getValueType()); + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + N.getValueType()); Index = N; return true; } @@ -2153,7 +2155,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, } /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be -/// adjusted to accomodate the arguments for the tailcall. +/// adjusted to accommodate the arguments for the tailcall. static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, unsigned ParamSize) { @@ -2394,7 +2396,7 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, // Emit a sequence of copyto/copyfrom virtual registers for arguments that // might overwrite each other in case of tail call optimization. SmallVector<SDValue, 8> MemOpChains2; - // Do not flag preceeding copytoreg stuff together with the following stuff. + // Do not flag preceding copytoreg stuff together with the following stuff. InFlag = SDValue(); StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, MemOpChains2, dl); @@ -2442,7 +2444,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) { unsigned OpFlags = 0; if (DAG.getTarget().getRelocationModel() != Reloc::Static && - PPCSubTarget.getDarwinVers() < 9 && + (!PPCSubTarget.getTargetTriple().isMacOSX() || + PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && (G->getGlobal()->isDeclaration() || G->getGlobal()->isWeakForLinker())) { // PC-relative references to external symbols should go through $stub, @@ -2465,7 +2468,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, unsigned char OpFlags = 0; if (DAG.getTarget().getRelocationModel() != Reloc::Static && - PPCSubTarget.getDarwinVers() < 9) { + (!PPCSubTarget.getTargetTriple().isMacOSX() || + PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) { // PC-relative references to external symbols should go through $stub, // unless we're building with the leopard linker or later, which // automatically synthesizes these stubs. @@ -4571,6 +4575,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, // registers without caring whether they're 32 or 64, but here we're // doing actual arithmetic on the addresses. bool is64bit = PPCSubTarget.isPPC64(); + unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0; const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); @@ -4634,8 +4639,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, // bne- loopMBB // fallthrough --> exitMBB // srw dest, tmpDest, shift - - if (ptrA!=PPC::R0) { + if (ptrA != ZeroReg) { Ptr1Reg = RegInfo.createVirtualRegister(RC); BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) .addReg(ptrA).addReg(ptrB); @@ -4665,7 +4669,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, BB = loopMBB; BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) - .addReg(PPC::R0).addReg(PtrReg); + .addReg(ZeroReg).addReg(PtrReg); if (BinOpcode) BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) .addReg(Incr2Reg).addReg(TmpDestReg); @@ -4676,7 +4680,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) .addReg(Tmp3Reg).addReg(Tmp2Reg); BuildMI(BB, dl, TII->get(PPC::STWCX)) - .addReg(Tmp4Reg).addReg(PPC::R0).addReg(PtrReg); + .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); BB->addSuccessor(loopMBB); @@ -4685,7 +4689,8 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, // exitMBB: // ... BB = exitMBB; - BuildMI(BB, dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg).addReg(ShiftReg); + BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) + .addReg(ShiftReg); return BB; } @@ -4933,6 +4938,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); unsigned Ptr1Reg; unsigned TmpReg = RegInfo.createVirtualRegister(RC); + unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0; // thisMBB: // ... // fallthrough --> loopMBB @@ -4965,7 +4971,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // stwcx. tmpDest, ptr // exitBB: // srw dest, tmpDest, shift - if (ptrA!=PPC::R0) { + if (ptrA != ZeroReg) { Ptr1Reg = RegInfo.createVirtualRegister(RC); BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) .addReg(ptrA).addReg(ptrB); @@ -5002,7 +5008,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB = loop1MBB; BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) - .addReg(PPC::R0).addReg(PtrReg); + .addReg(ZeroReg).addReg(PtrReg); BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) .addReg(TmpDestReg).addReg(MaskReg); BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) @@ -5018,7 +5024,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) .addReg(Tmp2Reg).addReg(NewVal3Reg); BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) - .addReg(PPC::R0).addReg(PtrReg); + .addReg(ZeroReg).addReg(PtrReg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); @@ -5027,13 +5033,14 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB = midMBB; BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) - .addReg(PPC::R0).addReg(PtrReg); + .addReg(ZeroReg).addReg(PtrReg); BB->addSuccessor(exitMBB); // exitMBB: // ... BB = exitMBB; - BuildMI(BB, dl, TII->get(PPC::SRW),dest).addReg(TmpReg).addReg(ShiftReg); + BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) + .addReg(ShiftReg); } else { llvm_unreachable("Unexpected instr type to insert"); } diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 6636b69..9f0fae5 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -130,7 +130,7 @@ def : Pat<(PPCnop), // Atomic operations let usesCustomInserter = 1 in { - let Uses = [CR0] in { + let Defs = [CR0] in { def ATOMIC_LOAD_ADD_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 82aadeb..24071b7 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -550,7 +550,7 @@ def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), // Atomic operations let usesCustomInserter = 1 in { - let Uses = [CR0] in { + let Defs = [CR0] in { def ATOMIC_LOAD_ADD_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>; diff --git a/lib/Target/PowerPC/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/PPCMCAsmInfo.cpp index d1178dd..9e508cc 100644 --- a/lib/Target/PowerPC/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/PPCMCAsmInfo.cpp @@ -17,7 +17,7 @@ using namespace llvm; PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { PCSymbol = "."; CommentString = ";"; - ExceptionsType = ExceptionHandling::DwarfTable; + ExceptionsType = ExceptionHandling::DwarfCFI; if (!is64Bit) Data64bitsDirective = 0; // We can't emit a 64-bit unit in PPC32 mode. diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 72a1dee..5f3aa23 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -70,7 +70,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS, , HasSTFIWX(false) , HasLazyResolverStubs(false) , IsJITCodeModel(false) - , DarwinVers(0) { + , TargetTriple(TT) { // Determine default and user specified characteristics std::string CPU = "generic"; @@ -92,19 +92,6 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS, // support it, ignore. if (use64BitRegs() && !has64BitSupport()) Use64BitRegs = false; - - // Set the boolean corresponding to the current target triple, or the default - // if one cannot be determined, to true. - if (TT.length() > 7) { - // Determine which version of darwin this is. - size_t DarwinPos = TT.find("-darwin"); - if (DarwinPos != std::string::npos) { - if (isdigit(TT[DarwinPos+7])) - DarwinVers = atoi(&TT[DarwinPos+7]); - else - DarwinVers = 8; // Minimum supported darwin is Tiger. - } - } // Set up darwin-specific properties. if (isDarwin()) diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 00ec747..8fd1a44 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -14,6 +14,7 @@ #ifndef POWERPCSUBTARGET_H #define POWERPCSUBTARGET_H +#include "llvm/ADT/Triple.h" #include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetSubtarget.h" @@ -65,9 +66,9 @@ protected: bool HasLazyResolverStubs; bool IsJITCodeModel; - /// DarwinVers - Nonzero if this is a darwin platform. Otherwise, the numeric - /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc. - unsigned char DarwinVers; // Is any darwin-ppc platform. + /// TargetTriple - What processor and OS we're targeting. + Triple TargetTriple; + public: /// This constructor initializes the data members to match that /// of the specified triple. @@ -134,13 +135,10 @@ public: bool hasAltivec() const { return HasAltivec; } bool isGigaProcessor() const { return IsGigaProcessor; } - /// isDarwin - True if this is any darwin platform. - bool isDarwin() const { return DarwinVers != 0; } - /// isDarwin - True if this is darwin9 (leopard, 10.5) or above. - bool isDarwin9() const { return DarwinVers >= 9; } + const Triple &getTargetTriple() const { return TargetTriple; } - /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard. - unsigned getDarwinVers() const { return DarwinVers; } + /// isDarwin - True if this is any darwin platform. + bool isDarwin() const { return TargetTriple.isMacOSX(); } bool isDarwinABI() const { return isDarwin(); } bool isSVR4ABI() const { return !isDarwin(); } diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 212b450..d27e54e 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -24,7 +24,7 @@ using namespace llvm; static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); bool isPPC64 = TheTriple.getArch() == Triple::ppc64; - if (TheTriple.getOS() == Triple::Darwin) + if (TheTriple.isOSDarwin()) return new PPCMCAsmInfoDarwin(isPPC64); return new PPCLinuxMCAsmInfo(isPPC64); @@ -37,12 +37,10 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, MCCodeEmitter *Emitter, bool RelaxAll, bool NoExecStack) { - switch (Triple(TT).getOS()) { - case Triple::Darwin: + if (Triple(TT).isOSDarwin()) return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll); - default: - return NULL; - } + + return NULL; } extern "C" void LLVMInitializePowerPCTarget() { diff --git a/lib/Target/README.txt b/lib/Target/README.txt index f85914b..ffe3fa4 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -392,34 +392,6 @@ PHI Slicing could be extended to do this. //===---------------------------------------------------------------------===// -LSR should know what GPR types a target has from TargetData. This code: - -volatile short X, Y; // globals - -void foo(int N) { - int i; - for (i = 0; i < N; i++) { X = i; Y = i*4; } -} - -produces two near identical IV's (after promotion) on PPC/ARM: - -LBB1_2: - ldr r3, LCPI1_0 - ldr r3, [r3] - strh r2, [r3] - ldr r3, LCPI1_1 - ldr r3, [r3] - strh r1, [r3] - add r1, r1, #4 - add r2, r2, #1 <- [0,+,1] - sub r0, r0, #1 <- [0,-,1] - cmp r0, #0 - bne LBB1_2 - -LSR should reuse the "+" IV for the exit test. - -//===---------------------------------------------------------------------===// - Tail call elim should be more aggressive, checking to see if the call is followed by an uncond branch to an exit block. @@ -1325,6 +1297,21 @@ codegen. //===---------------------------------------------------------------------===// +simplifylibcalls should turn these snprintf idioms into memcpy (GCC PR47917) + +char buf1[6], buf2[6], buf3[4], buf4[4]; +int i; + +int foo (void) { + int ret = snprintf (buf1, sizeof buf1, "abcde"); + ret += snprintf (buf2, sizeof buf2, "abcdef") * 16; + ret += snprintf (buf3, sizeof buf3, "%s", i++ < 6 ? "abc" : "def") * 256; + ret += snprintf (buf4, sizeof buf4, "%s", i++ > 10 ? "abcde" : "defgh")*4096; + return ret; +} + +//===---------------------------------------------------------------------===// + "gas" uses this idiom: else if (strchr ("+-/*%|&^:[]()~", *intel_parser.op_string)) .. @@ -1780,43 +1767,6 @@ case it choses instead to keep the max operation obvious. //===---------------------------------------------------------------------===// -Take the following testcase on x86-64 (similar testcases exist for all targets -with addc/adde): - -define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b, -i64 %c) nounwind { -entry: - %0 = zext i64 %a to i128 ; <i128> [#uses=1] - %1 = zext i64 %b to i128 ; <i128> [#uses=1] - %2 = add i128 %1, %0 ; <i128> [#uses=2] - %3 = zext i64 %c to i128 ; <i128> [#uses=1] - %4 = shl i128 %3, 64 ; <i128> [#uses=1] - %5 = add i128 %4, %2 ; <i128> [#uses=1] - %6 = lshr i128 %5, 64 ; <i128> [#uses=1] - %7 = trunc i128 %6 to i64 ; <i64> [#uses=1] - store i64 %7, i64* %s, align 8 - %8 = trunc i128 %2 to i64 ; <i64> [#uses=1] - store i64 %8, i64* %t, align 8 - ret void -} - -Generated code: - addq %rcx, %rdx - sbbq %rax, %rax - subq %rax, %r8 - movq %r8, (%rdi) - movq %rdx, (%rsi) - ret - -Expected code: - addq %rcx, %rdx - adcq $0, %r8 - movq %r8, (%rdi) - movq %rdx, (%rsi) - ret - -//===---------------------------------------------------------------------===// - Switch lowering generates less than ideal code for the following switch: define void @a(i32 %x) nounwind { entry: @@ -2124,11 +2074,12 @@ for.end: ; preds = %entry } This shouldn't need the ((zext (%n - 1)) + 1) game, and it should ideally fold -the two memset's together. The issue with %n seems to stem from poor handling -of the original loop. +the two memset's together. -To simplify this, we need SCEV to know that "n != 0" because of the dominating -conditional. That would turn the second memset into a simple memset of 'n'. +The issue with the addition only occurs in 64-bit mode, and appears to be at +least partially caused by Scalar Evolution not keeping its cache updated: it +returns the "wrong" result immediately after indvars runs, but figures out the +expected result if it is run from scratch on IR resulting from running indvars. //===---------------------------------------------------------------------===// @@ -2287,4 +2238,71 @@ missed cases: //===---------------------------------------------------------------------===// +define i1 @test1(i32 %x) nounwind { + %and = and i32 %x, 3 + %cmp = icmp ult i32 %and, 2 + ret i1 %cmp +} + +Can be folded to (x & 2) == 0. + +define i1 @test2(i32 %x) nounwind { + %and = and i32 %x, 3 + %cmp = icmp ugt i32 %and, 1 + ret i1 %cmp +} + +Can be folded to (x & 2) != 0. + +SimplifyDemandedBits shrinks the "and" constant to 2 but instcombine misses the +icmp transform. + +//===---------------------------------------------------------------------===// + +This code: + +typedef struct { +int f1:1; +int f2:1; +int f3:1; +int f4:29; +} t1; + +typedef struct { +int f1:1; +int f2:1; +int f3:30; +} t2; + +t1 s1; +t2 s2; + +void func1(void) +{ +s1.f1 = s2.f1; +s1.f2 = s2.f2; +} + +Compiles into this IR (on x86-64 at least): + +%struct.t1 = type { i8, [3 x i8] } +@s2 = global %struct.t1 zeroinitializer, align 4 +@s1 = global %struct.t1 zeroinitializer, align 4 +define void @func1() nounwind ssp noredzone { +entry: + %0 = load i32* bitcast (%struct.t1* @s2 to i32*), align 4 + %bf.val.sext5 = and i32 %0, 1 + %1 = load i32* bitcast (%struct.t1* @s1 to i32*), align 4 + %2 = and i32 %1, -4 + %3 = or i32 %2, %bf.val.sext5 + %bf.val.sext26 = and i32 %0, 2 + %4 = or i32 %3, %bf.val.sext26 + store i32 %4, i32* bitcast (%struct.t1* @s1 to i32*), align 4 + ret void +} + +The two or/and's should be merged into one each. + +//===---------------------------------------------------------------------===// + diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 70574c3..edb62fa 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -544,7 +544,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. - // The InFlag in necessary since all emited instructions must be + // The InFlag in necessary since all emitted instructions must be // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp index 3cf95b5..e0a9de8 100644 --- a/lib/Target/SubtargetFeature.cpp +++ b/lib/Target/SubtargetFeature.cpp @@ -211,7 +211,7 @@ const std::string & SubtargetFeatures::getCPU() const { /// feature, set it. /// static -void SetImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry, +void SetImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry, const SubtargetFeatureKV *FeatureTable, size_t FeatureTableSize) { for (size_t i = 0; i < FeatureTableSize; ++i) { @@ -230,7 +230,7 @@ void SetImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry, /// feature, clear it. /// static -void ClearImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry, +void ClearImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry, const SubtargetFeatureKV *FeatureTable, size_t FeatureTableSize) { for (size_t i = 0; i < FeatureTableSize; ++i) { @@ -247,7 +247,7 @@ void ClearImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry, /// getBits - Get feature bits. /// -uint32_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable, +uint64_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize, const SubtargetFeatureKV *FeatureTable, size_t FeatureTableSize) { @@ -263,7 +263,7 @@ uint32_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable, "CPU features table is not sorted"); } #endif - uint32_t Bits = 0; // Resulting bits + uint64_t Bits = 0; // Resulting bits // Check if help is needed if (Features[0] == "help") diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 90939c3..d331614 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -451,7 +451,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Build a sequence of copy-to-reg nodes chained together with token chain and // flag operands which copy the outgoing args into registers. The InFlag in - // necessary since all emited instructions must be stuck together. + // necessary since all emitted instructions must be stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index c628df0..1990bc7 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -617,10 +617,14 @@ uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices, unsigned TargetData::getPreferredAlignment(const GlobalVariable *GV) const { const Type *ElemType = GV->getType()->getElementType(); unsigned Alignment = getPrefTypeAlignment(ElemType); - if (GV->getAlignment() > Alignment) - Alignment = GV->getAlignment(); + unsigned GVAlignment = GV->getAlignment(); + if (GVAlignment >= Alignment) { + Alignment = GVAlignment; + } else if (GVAlignment != 0) { + Alignment = std::max(GVAlignment, getABITypeAlignment(ElemType)); + } - if (GV->hasInitializer()) { + if (GV->hasInitializer() && GVAlignment == 0) { if (Alignment < 16) { // If the global is not external, see if it is large. If so, give it a // larger alignment. diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp index 97f3bf6..d4b7697 100644 --- a/lib/Target/TargetInstrInfo.cpp +++ b/lib/Target/TargetInstrInfo.cpp @@ -149,10 +149,10 @@ bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { /// Measure the specified inline asm to determine an approximation of its /// length. -/// Comments (which run till the next SeparatorChar or newline) do not +/// Comments (which run till the next SeparatorString or newline) do not /// count as an instruction. /// Any other non-whitespace text is considered an instruction, with -/// multiple instructions separated by SeparatorChar or newlines. +/// multiple instructions separated by SeparatorString or newlines. /// Variable-length instructions are not handled here; this function /// may be overloaded in the target code to do that. unsigned TargetInstrInfo::getInlineAsmLength(const char *Str, @@ -163,7 +163,8 @@ unsigned TargetInstrInfo::getInlineAsmLength(const char *Str, bool atInsnStart = true; unsigned Length = 0; for (; *Str; ++Str) { - if (*Str == '\n' || *Str == MAI.getSeparatorChar()) + if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(), + strlen(MAI.getSeparatorString())) == 0) atInsnStart = true; if (atInsnStart && !std::isspace(*Str)) { Length += MAI.getMaxInstLength(); diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index c8bed18..e336b09 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -28,9 +28,22 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T) { // memset_pattern16 is only available on iOS 3.0 and Mac OS/X 10.5 and later. - if (T.getOS() != Triple::Darwin || T.getDarwinMajorNumber() < 9) + if (T.isMacOSX()) { + if (T.isMacOSXVersionLT(10, 5)) + TLI.setUnavailable(LibFunc::memset_pattern16); + } else if (T.getOS() == Triple::IOS) { + if (T.isOSVersionLT(3, 0)) + TLI.setUnavailable(LibFunc::memset_pattern16); + } else { TLI.setUnavailable(LibFunc::memset_pattern16); - + } + + // iprintf and friends are only available on XCore. + if (T.getArch() != Triple::xcore) { + TLI.setUnavailable(LibFunc::iprintf); + TLI.setUnavailable(LibFunc::siprintf); + TLI.setUnavailable(LibFunc::fiprintf); + } } diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index 5d34c7d..717ad41 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -120,6 +120,18 @@ static bool IsNullTerminatedString(const Constant *C) { return false; } +MCSymbol *TargetLoweringObjectFile:: +getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI) const { + return Mang->getSymbol(GV); +} + +void TargetLoweringObjectFile::emitPersonalityValue(MCStreamer &Streamer, + const TargetMachine &TM, + const MCSymbol *Sym) const { +} + + /// getKindForGlobal - This is a top-level target-independent classifier for /// a global variable. Given an global variable and information from TM, it /// classifies the global in a variety of ways that make various target @@ -305,16 +317,15 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, MachineModuleInfo *MMI, unsigned Encoding, MCStreamer &Streamer) const { const MCSymbol *Sym = Mang->getSymbol(GV); - return getExprForDwarfReference(Sym, Mang, MMI, Encoding, Streamer); + return getExprForDwarfReference(Sym, Encoding, Streamer); } const MCExpr *TargetLoweringObjectFile:: -getExprForDwarfReference(const MCSymbol *Sym, Mangler *Mang, - MachineModuleInfo *MMI, unsigned Encoding, +getExprForDwarfReference(const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const { const MCExpr *Res = MCSymbolRefExpr::Create(Sym, getContext()); - switch (Encoding & 0xF0) { + switch (Encoding & 0x70) { default: report_fatal_error("We do not support this DWARF encoding yet!"); case dwarf::DW_EH_PE_absptr: @@ -339,7 +350,7 @@ unsigned TargetLoweringObjectFile::getLSDAEncoding() const { return dwarf::DW_EH_PE_absptr; } -unsigned TargetLoweringObjectFile::getFDEEncoding() const { +unsigned TargetLoweringObjectFile::getFDEEncoding(bool CFI) const { return dwarf::DW_EH_PE_absptr; } diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index d579d95..76ccc09 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -48,6 +48,7 @@ namespace llvm { bool RealignStack; bool DisableJumpTables; bool StrongPHIElim; + bool HasDivModLibcall; bool AsmVerbosityDefault(false); } @@ -205,6 +206,10 @@ EnableStrongPHIElim(cl::Hidden, "strong-phi-elim", cl::desc("Use strong PHI elimination."), cl::location(StrongPHIElim), cl::init(false)); +static cl::opt<std::string> +TrapFuncName("trap-func", cl::Hidden, + cl::desc("Emit a call to trap function rather than a trap instruction"), + cl::init("")); static cl::opt<bool> DataSections("fdata-sections", cl::desc("Emit data into separate sections"), @@ -221,7 +226,9 @@ TargetMachine::TargetMachine(const Target &T) : TheTarget(T), AsmInfo(0), MCRelaxAll(false), MCNoExecStack(false), - MCUseLoc(true) { + MCSaveTempLabels(false), + MCUseLoc(true), + MCUseCFI(true) { // Typically it will be subtargets that will adjust FloatABIType from Default // to Soft or Hard. if (UseSoftFloat) @@ -303,4 +310,11 @@ namespace llvm { bool HonorSignDependentRoundingFPMath() { return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption; } + + /// getTrapFunctionName - If this returns a non-empty string, this means isel + /// should lower Intrinsic::trap to a call to the specified function name + /// instead of an ISD::TRAP node. + StringRef getTrapFunctionName() { + return TrapFuncName; + } } diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 8fe549b..c352bfc 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -53,6 +53,14 @@ private: SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out); + /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi) + /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode. + bool isSrcOp(X86Operand &Op); + + /// isDstOp - Returns true if operand is either %es:(%rdi) in 64bit mode + /// or %es:(%edi) in 32bit mode. + bool isDstOp(X86Operand &Op); + /// @name Auto-generated Matcher Functions /// { @@ -356,6 +364,24 @@ struct X86Operand : public MCParsedAsmOperand { } // end anonymous namespace. +bool X86ATTAsmParser::isSrcOp(X86Operand &Op) { + unsigned basereg = Is64Bit ? X86::RSI : X86::ESI; + + return (Op.isMem() && + (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) && + isa<MCConstantExpr>(Op.Mem.Disp) && + cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && + Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0); +} + +bool X86ATTAsmParser::isDstOp(X86Operand &Op) { + unsigned basereg = Is64Bit ? X86::RDI : X86::EDI; + + return Op.isMem() && Op.Mem.SegReg == X86::ES && + isa<MCConstantExpr>(Op.Mem.Disp) && + cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && + Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0; +} bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { @@ -788,7 +814,106 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, delete &Op; } } - + // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]" + if (Name.startswith("ins") && Operands.size() == 3 && + (Name == "insb" || Name == "insw" || Name == "insl")) { + X86Operand &Op = *(X86Operand*)Operands.begin()[1]; + X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; + if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) { + Operands.pop_back(); + Operands.pop_back(); + delete &Op; + delete &Op2; + } + } + + // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]" + if (Name.startswith("outs") && Operands.size() == 3 && + (Name == "outsb" || Name == "outsw" || Name == "outsl")) { + X86Operand &Op = *(X86Operand*)Operands.begin()[1]; + X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; + if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) { + Operands.pop_back(); + Operands.pop_back(); + delete &Op; + delete &Op2; + } + } + + // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]" + if (Name.startswith("movs") && Operands.size() == 3 && + (Name == "movsb" || Name == "movsw" || Name == "movsl" || + (Is64Bit && Name == "movsq"))) { + X86Operand &Op = *(X86Operand*)Operands.begin()[1]; + X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; + if (isSrcOp(Op) && isDstOp(Op2)) { + Operands.pop_back(); + Operands.pop_back(); + delete &Op; + delete &Op2; + } + } + // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]" + if (Name.startswith("lods") && Operands.size() == 3 && + (Name == "lods" || Name == "lodsb" || Name == "lodsw" || + Name == "lodsl" || (Is64Bit && Name == "lodsq"))) { + X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); + X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]); + if (isSrcOp(*Op1) && Op2->isReg()) { + const char *ins; + unsigned reg = Op2->getReg(); + bool isLods = Name == "lods"; + if (reg == X86::AL && (isLods || Name == "lodsb")) + ins = "lodsb"; + else if (reg == X86::AX && (isLods || Name == "lodsw")) + ins = "lodsw"; + else if (reg == X86::EAX && (isLods || Name == "lodsl")) + ins = "lodsl"; + else if (reg == X86::RAX && (isLods || Name == "lodsq")) + ins = "lodsq"; + else + ins = NULL; + if (ins != NULL) { + Operands.pop_back(); + Operands.pop_back(); + delete Op1; + delete Op2; + if (Name != ins) + static_cast<X86Operand*>(Operands[0])->setTokenValue(ins); + } + } + } + // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]" + if (Name.startswith("stos") && Operands.size() == 3 && + (Name == "stos" || Name == "stosb" || Name == "stosw" || + Name == "stosl" || (Is64Bit && Name == "stosq"))) { + X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); + X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]); + if (isDstOp(*Op2) && Op1->isReg()) { + const char *ins; + unsigned reg = Op1->getReg(); + bool isStos = Name == "stos"; + if (reg == X86::AL && (isStos || Name == "stosb")) + ins = "stosb"; + else if (reg == X86::AX && (isStos || Name == "stosw")) + ins = "stosw"; + else if (reg == X86::EAX && (isStos || Name == "stosl")) + ins = "stosl"; + else if (reg == X86::RAX && (isStos || Name == "stosq")) + ins = "stosq"; + else + ins = NULL; + if (ins != NULL) { + Operands.pop_back(); + Operands.pop_back(); + delete Op1; + delete Op2; + if (Name != ins) + static_cast<X86Operand*>(Operands[0])->setTokenValue(ins); + } + } + } + // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to // "shift <op>". if ((Name.startswith("shr") || Name.startswith("sar") || @@ -803,6 +928,18 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, Operands.erase(Operands.begin() + 1); } } + + // Transforms "int $3" into "int3" as a size optimization. We can't write an + // instalias with an immediate operand yet. + if (Name == "int" && Operands.size() == 2) { + X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); + if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && + cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) { + delete Operands[1]; + Operands.erase(Operands.begin() + 1); + static_cast<X86Operand*>(Operands[0])->setTokenValue("int3"); + } + } return false; } diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index f777756..d8a105e 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -409,6 +409,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, case TYPE_XMM32: case TYPE_XMM64: case TYPE_XMM128: + case TYPE_XMM256: case TYPE_DEBUGREG: case TYPE_CONTROLREG: return translateRMRegister(mcInst, insn); @@ -418,6 +419,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, case TYPE_M32: case TYPE_M64: case TYPE_M128: + case TYPE_M256: case TYPE_M512: case TYPE_Mv: case TYPE_M32FP: @@ -500,6 +502,9 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, case ENCODING_Rv: translateRegister(mcInst, insn.opcodeRegister); return false; + case ENCODING_VVVV: + translateRegister(mcInst, insn.vvvv); + return false; case ENCODING_DUP: return translateOperand(mcInst, insn.spec->operands[operand.type - TYPE_DUP0], diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index b6546fc..de1610b 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -75,6 +75,12 @@ static int modRMRequired(OpcodeType type, case THREEBYTE_3A: decision = &THREEBYTE3A_SYM; break; + case THREEBYTE_A6: + decision = &THREEBYTEA6_SYM; + break; + case THREEBYTE_A7: + decision = &THREEBYTEA7_SYM; + break; } return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. @@ -115,6 +121,12 @@ static InstrUID decode(OpcodeType type, case THREEBYTE_3A: dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; break; + case THREEBYTE_A6: + dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; + break; + case THREEBYTE_A7: + dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; + break; } switch (dec->modrm_type) { @@ -368,29 +380,109 @@ static int readPrefixes(struct InternalInstruction* insn) { if (isPrefix) dbgprintf(insn, "Found prefix 0x%hhx", byte); } + + insn->vexSize = 0; - if (insn->mode == MODE_64BIT) { - if ((byte & 0xf0) == 0x40) { - uint8_t opcodeByte; + if (byte == 0xc4) { + uint8_t byte1; - if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { - dbgprintf(insn, "Redundant REX prefix"); - return -1; + if (lookAtByte(insn, &byte1)) { + dbgprintf(insn, "Couldn't read second byte of VEX"); + return -1; + } + + if (insn->mode == MODE_64BIT || byte1 & 0x8) { + insn->vexSize = 3; + insn->necessaryPrefixLocation = insn->readerCursor - 1; + } + else { + unconsumeByte(insn); + insn->necessaryPrefixLocation = insn->readerCursor - 1; + } + + if (insn->vexSize == 3) { + insn->vexPrefix[0] = byte; + consumeByte(insn, &insn->vexPrefix[1]); + consumeByte(insn, &insn->vexPrefix[2]); + + /* We simulate the REX prefix for simplicity's sake */ + + insn->rexPrefix = 0x40 + | (wFromVEX3of3(insn->vexPrefix[2]) << 3) + | (rFromVEX2of3(insn->vexPrefix[1]) << 2) + | (xFromVEX2of3(insn->vexPrefix[1]) << 1) + | (bFromVEX2of3(insn->vexPrefix[1]) << 0); + + switch (ppFromVEX3of3(insn->vexPrefix[2])) + { + default: + break; + case VEX_PREFIX_66: + hasOpSize = TRUE; + break; } + + dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]); + } + } + else if (byte == 0xc5) { + uint8_t byte1; + + if (lookAtByte(insn, &byte1)) { + dbgprintf(insn, "Couldn't read second byte of VEX"); + return -1; + } - insn->rexPrefix = byte; - insn->necessaryPrefixLocation = insn->readerCursor - 2; - - dbgprintf(insn, "Found REX prefix 0x%hhx", byte); - } else { + if (insn->mode == MODE_64BIT || byte1 & 0x8) { + insn->vexSize = 2; + } + else { + unconsumeByte(insn); + } + + if (insn->vexSize == 2) { + insn->vexPrefix[0] = byte; + consumeByte(insn, &insn->vexPrefix[1]); + + insn->rexPrefix = 0x40 + | (rFromVEX2of2(insn->vexPrefix[1]) << 2); + + switch (ppFromVEX2of2(insn->vexPrefix[1])) + { + default: + break; + case VEX_PREFIX_66: + hasOpSize = TRUE; + break; + } + + dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]); + } + } + else { + if (insn->mode == MODE_64BIT) { + if ((byte & 0xf0) == 0x40) { + uint8_t opcodeByte; + + if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { + dbgprintf(insn, "Redundant REX prefix"); + return -1; + } + + insn->rexPrefix = byte; + insn->necessaryPrefixLocation = insn->readerCursor - 2; + + dbgprintf(insn, "Found REX prefix 0x%hhx", byte); + } else { + unconsumeByte(insn); + insn->necessaryPrefixLocation = insn->readerCursor - 1; + } + } else { unconsumeByte(insn); insn->necessaryPrefixLocation = insn->readerCursor - 1; } - } else { - unconsumeByte(insn); - insn->necessaryPrefixLocation = insn->readerCursor - 1; } - + if (insn->mode == MODE_16BIT) { insn->registerSize = (hasOpSize ? 4 : 2); insn->addressSize = (hasAdSize ? 4 : 2); @@ -438,6 +530,39 @@ static int readOpcode(struct InternalInstruction* insn) { dbgprintf(insn, "readOpcode()"); insn->opcodeType = ONEBYTE; + + if (insn->vexSize == 3) + { + switch (mmmmmFromVEX2of3(insn->vexPrefix[1])) + { + default: + dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1])); + return -1; + case 0: + break; + case VEX_LOB_0F: + insn->twoByteEscape = 0x0f; + insn->opcodeType = TWOBYTE; + return consumeByte(insn, &insn->opcode); + case VEX_LOB_0F38: + insn->twoByteEscape = 0x0f; + insn->threeByteEscape = 0x38; + insn->opcodeType = THREEBYTE_38; + return consumeByte(insn, &insn->opcode); + case VEX_LOB_0F3A: + insn->twoByteEscape = 0x0f; + insn->threeByteEscape = 0x3a; + insn->opcodeType = THREEBYTE_3A; + return consumeByte(insn, &insn->opcode); + } + } + else if (insn->vexSize == 2) + { + insn->twoByteEscape = 0x0f; + insn->opcodeType = TWOBYTE; + return consumeByte(insn, &insn->opcode); + } + if (consumeByte(insn, ¤t)) return -1; @@ -467,6 +592,24 @@ static int readOpcode(struct InternalInstruction* insn) { return -1; insn->opcodeType = THREEBYTE_3A; + } else if (current == 0xa6) { + dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); + + insn->threeByteEscape = current; + + if (consumeByte(insn, ¤t)) + return -1; + + insn->opcodeType = THREEBYTE_A6; + } else if (current == 0xa7) { + dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); + + insn->threeByteEscape = current; + + if (consumeByte(insn, ¤t)) + return -1; + + insn->opcodeType = THREEBYTE_A7; } else { dbgprintf(insn, "Didn't find a three-byte escape prefix"); @@ -600,20 +743,64 @@ static int getID(struct InternalInstruction* insn) { dbgprintf(insn, "getID()"); attrMask = ATTR_NONE; - + if (insn->mode == MODE_64BIT) attrMask |= ATTR_64BIT; - - if (insn->rexPrefix & 0x08) - attrMask |= ATTR_REXW; - - if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) - attrMask |= ATTR_OPSIZE; - else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) - attrMask |= ATTR_XS; - else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) - attrMask |= ATTR_XD; - + + if (insn->vexSize) { + attrMask |= ATTR_VEX; + + if (insn->vexSize == 3) { + switch (ppFromVEX3of3(insn->vexPrefix[2])) { + case VEX_PREFIX_66: + attrMask |= ATTR_OPSIZE; + break; + case VEX_PREFIX_F3: + attrMask |= ATTR_XS; + break; + case VEX_PREFIX_F2: + attrMask |= ATTR_XD; + break; + } + + if (wFromVEX3of3(insn->vexPrefix[2])) + attrMask |= ATTR_REXW; + if (lFromVEX3of3(insn->vexPrefix[2])) + attrMask |= ATTR_VEXL; + } + else if (insn->vexSize == 2) { + switch (ppFromVEX2of2(insn->vexPrefix[1])) { + case VEX_PREFIX_66: + attrMask |= ATTR_OPSIZE; + break; + case VEX_PREFIX_F3: + attrMask |= ATTR_XS; + break; + case VEX_PREFIX_F2: + attrMask |= ATTR_XD; + break; + } + + if (lFromVEX2of2(insn->vexPrefix[1])) + attrMask |= ATTR_VEXL; + } + else { + return -1; + } + } + else { + if (insn->rexPrefix & 0x08) + attrMask |= ATTR_REXW; + + if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) + attrMask |= ATTR_OPSIZE; + else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) + attrMask |= ATTR_XS; + else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) + attrMask |= ATTR_XD; + + } + if (getIDWithAttrMask(&instructionID, insn, attrMask)) return -1; @@ -749,7 +936,7 @@ static int readSIB(struct InternalInstruction* insn) { insn->sibIndex = SIB_INDEX_NONE; break; default: - insn->sibIndex = (EABase)(sibIndexBase + index); + insn->sibIndex = (SIBIndex)(sibIndexBase + index); if (insn->sibIndex == SIB_INDEX_sib || insn->sibIndex == SIB_INDEX_sib64) insn->sibIndex = SIB_INDEX_NONE; @@ -796,7 +983,7 @@ static int readSIB(struct InternalInstruction* insn) { } break; default: - insn->sibBase = (EABase)(sibBaseBase + base); + insn->sibBase = (SIBBase)(sibBaseBase + base); break; } @@ -1012,6 +1199,8 @@ static int readModRM(struct InternalInstruction* insn) { return prefix##_EAX + index; \ case TYPE_R64: \ return prefix##_RAX + index; \ + case TYPE_XMM256: \ + return prefix##_YMM0 + index; \ case TYPE_XMM128: \ case TYPE_XMM64: \ case TYPE_XMM32: \ @@ -1073,6 +1262,14 @@ static int fixupReg(struct InternalInstruction *insn, default: debug("Expected a REG or R/M encoding in fixupReg"); return -1; + case ENCODING_VVVV: + insn->vvvv = (Reg)fixupRegValue(insn, + (OperandType)op->type, + insn->vvvv, + &valid); + if (!valid) + return -1; + break; case ENCODING_REG: insn->reg = (Reg)fixupRegValue(insn, (OperandType)op->type, @@ -1237,6 +1434,27 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) { } /* + * readVVVV - Consumes an immediate operand from an instruction, given the + * desired operand size. + * + * @param insn - The instruction whose operand is to be read. + * @return - 0 if the immediate was successfully consumed; nonzero + * otherwise. + */ +static int readVVVV(struct InternalInstruction* insn) { + dbgprintf(insn, "readVVVV()"); + + if (insn->vexSize == 3) + insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]); + else if (insn->vexSize == 2) + insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]); + else + return -1; + + return 0; +} + +/* * readOperands - Consults the specifier for an instruction and consumes all * operands for that instruction, interpreting them as it goes. * @@ -1317,6 +1535,13 @@ static int readOperands(struct InternalInstruction* insn) { case ENCODING_I: if (readOpcodeModifier(insn)) return -1; + break; + case ENCODING_VVVV: + if (readVVVV(insn)) + return -1; + if (fixupReg(insn, &insn->spec->operands[index])) + return -1; + break; case ENCODING_DUP: break; default: diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index d0dc8b5..a9c90f8 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -34,16 +34,30 @@ extern "C" { /* * Accessor functions for various fields of an Intel instruction */ -#define modFromModRM(modRM) ((modRM & 0xc0) >> 6) -#define regFromModRM(modRM) ((modRM & 0x38) >> 3) -#define rmFromModRM(modRM) (modRM & 0x7) -#define scaleFromSIB(sib) ((sib & 0xc0) >> 6) -#define indexFromSIB(sib) ((sib & 0x38) >> 3) -#define baseFromSIB(sib) (sib & 0x7) -#define wFromREX(rex) ((rex & 0x8) >> 3) -#define rFromREX(rex) ((rex & 0x4) >> 2) -#define xFromREX(rex) ((rex & 0x2) >> 1) -#define bFromREX(rex) (rex & 0x1) +#define modFromModRM(modRM) (((modRM) & 0xc0) >> 6) +#define regFromModRM(modRM) (((modRM) & 0x38) >> 3) +#define rmFromModRM(modRM) ((modRM) & 0x7) +#define scaleFromSIB(sib) (((sib) & 0xc0) >> 6) +#define indexFromSIB(sib) (((sib) & 0x38) >> 3) +#define baseFromSIB(sib) ((sib) & 0x7) +#define wFromREX(rex) (((rex) & 0x8) >> 3) +#define rFromREX(rex) (((rex) & 0x4) >> 2) +#define xFromREX(rex) (((rex) & 0x2) >> 1) +#define bFromREX(rex) ((rex) & 0x1) + +#define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7) +#define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6) +#define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5) +#define mmmmmFromVEX2of3(vex) ((vex) & 0x1f) +#define wFromVEX3of3(vex) (((vex) & 0x80) >> 7) +#define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3) +#define lFromVEX3of3(vex) (((vex) & 0x4) >> 2) +#define ppFromVEX3of3(vex) ((vex) & 0x3) + +#define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7) +#define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3) +#define lFromVEX2of2(vex) (((vex) & 0x4) >> 2) +#define ppFromVEX2of2(vex) ((vex) & 0x3) /* * These enums represent Intel registers for use by the decoder. @@ -206,7 +220,25 @@ extern "C" { ENTRY(XMM13) \ ENTRY(XMM14) \ ENTRY(XMM15) - + +#define REGS_YMM \ + ENTRY(YMM0) \ + ENTRY(YMM1) \ + ENTRY(YMM2) \ + ENTRY(YMM3) \ + ENTRY(YMM4) \ + ENTRY(YMM5) \ + ENTRY(YMM6) \ + ENTRY(YMM7) \ + ENTRY(YMM8) \ + ENTRY(YMM9) \ + ENTRY(YMM10) \ + ENTRY(YMM11) \ + ENTRY(YMM12) \ + ENTRY(YMM13) \ + ENTRY(YMM14) \ + ENTRY(YMM15) + #define REGS_SEGMENT \ ENTRY(ES) \ ENTRY(CS) \ @@ -252,6 +284,7 @@ extern "C" { REGS_64BIT \ REGS_MMX \ REGS_XMM \ + REGS_YMM \ REGS_SEGMENT \ REGS_DEBUG \ REGS_CONTROL \ @@ -332,6 +365,27 @@ typedef enum { SEG_OVERRIDE_GS, SEG_OVERRIDE_max } SegmentOverride; + +/* + * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field + */ + +typedef enum { + VEX_LOB_0F = 0x1, + VEX_LOB_0F38 = 0x2, + VEX_LOB_0F3A = 0x3 +} VEXLeadingOpcodeByte; + +/* + * VEXPrefixCode - Possible values for the VEX.pp field + */ + +typedef enum { + VEX_PREFIX_NONE = 0x0, + VEX_PREFIX_66 = 0x1, + VEX_PREFIX_F3 = 0x2, + VEX_PREFIX_F2 = 0x3 +} VEXPrefixCode; typedef uint8_t BOOL; @@ -389,10 +443,12 @@ struct InternalInstruction { uint8_t prefixPresent[0x100]; /* contains the location (for use with the reader) of the prefix byte */ uint64_t prefixLocations[0x100]; + /* The value of the VEX prefix, if present */ + uint8_t vexPrefix[3]; + /* The length of the VEX prefix (0 if not present) */ + uint8_t vexSize; /* The value of the REX prefix, if present */ uint8_t rexPrefix; - /* The location of the REX prefix */ - uint64_t rexLocation; /* The location where a mandatory prefix would have to be (i.e., right before the opcode, or right before the REX prefix if one is present) */ uint64_t necessaryPrefixLocation; @@ -428,6 +484,10 @@ struct InternalInstruction { /* state for additional bytes, consumed during operand decode. Pattern: consumed___ indicates that the byte was already consumed and does not need to be consumed again */ + + /* The VEX.vvvv field, which contains a third register operand for some AVX + instructions */ + Reg vvvv; /* The ModR/M byte, which contains most register operands and some portion of all memory operands */ diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 1425b86..70315ed 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -30,6 +30,8 @@ #define TWOBYTE_SYM x86DisassemblerTwoByteOpcodes #define THREEBYTE38_SYM x86DisassemblerThreeByte38Opcodes #define THREEBYTE3A_SYM x86DisassemblerThreeByte3AOpcodes +#define THREEBYTEA6_SYM x86DisassemblerThreeByteA6Opcodes +#define THREEBYTEA7_SYM x86DisassemblerThreeByteA7Opcodes #define INSTRUCTIONS_STR "x86DisassemblerInstrSpecifiers" #define CONTEXTS_STR "x86DisassemblerContexts" @@ -37,6 +39,8 @@ #define TWOBYTE_STR "x86DisassemblerTwoByteOpcodes" #define THREEBYTE38_STR "x86DisassemblerThreeByte38Opcodes" #define THREEBYTE3A_STR "x86DisassemblerThreeByte3AOpcodes" +#define THREEBYTEA6_STR "x86DisassemblerThreeByteA6Opcodes" +#define THREEBYTEA7_STR "x86DisassemblerThreeByteA7Opcodes" /* * Attributes of an instruction that must be known before the opcode can be @@ -49,7 +53,9 @@ ENUM_ENTRY(ATTR_XS, 0x02) \ ENUM_ENTRY(ATTR_XD, 0x04) \ ENUM_ENTRY(ATTR_REXW, 0x08) \ - ENUM_ENTRY(ATTR_OPSIZE, 0x10) + ENUM_ENTRY(ATTR_OPSIZE, 0x10) \ + ENUM_ENTRY(ATTR_VEX, 0x20) \ + ENUM_ENTRY(ATTR_VEXL, 0x40) #define ENUM_ENTRY(n, v) n = v, enum attributeBits { @@ -87,7 +93,20 @@ enum attributeBits { "IC_64BIT_REXW_XS") \ ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 7, "The Dynamic Duo! Prefer over all " \ "else because this changes most " \ - "operands' meaning") + "operands' meaning") \ + ENUM_ENTRY(IC_VEX, 1, "requires a VEX prefix") \ + ENUM_ENTRY(IC_VEX_XS, 2, "requires VEX and the XS prefix") \ + ENUM_ENTRY(IC_VEX_XD, 2, "requires VEX and the XD prefix") \ + ENUM_ENTRY(IC_VEX_OPSIZE, 2, "requires VEX and the OpSize prefix") \ + ENUM_ENTRY(IC_VEX_W, 3, "requires VEX and the W prefix") \ + ENUM_ENTRY(IC_VEX_W_XS, 4, "requires VEX, W, and XS prefix") \ + ENUM_ENTRY(IC_VEX_W_XD, 4, "requires VEX, W, and XD prefix") \ + ENUM_ENTRY(IC_VEX_W_OPSIZE, 4, "requires VEX, W, and OpSize") \ + ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \ + ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\ + ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XS prefix")\ + ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize") + #define ENUM_ENTRY(n, r, d) n, typedef enum { @@ -104,7 +123,9 @@ typedef enum { ONEBYTE = 0, TWOBYTE = 1, THREEBYTE_38 = 2, - THREEBYTE_3A = 3 + THREEBYTE_3A = 3, + THREEBYTE_A6 = 4, + THREEBYTE_A7 = 5 } OpcodeType; /* @@ -183,6 +204,7 @@ struct ContextDecision { ENUM_ENTRY(ENCODING_NONE, "") \ ENUM_ENTRY(ENCODING_REG, "Register operand in ModR/M byte.") \ ENUM_ENTRY(ENCODING_RM, "R/M operand in ModR/M byte.") \ + ENUM_ENTRY(ENCODING_VVVV, "Register operand in VEX.vvvv byte.") \ ENUM_ENTRY(ENCODING_CB, "1-byte code offset (possible new CS value)") \ ENUM_ENTRY(ENCODING_CW, "2-byte") \ ENUM_ENTRY(ENCODING_CD, "4-byte") \ @@ -278,6 +300,7 @@ struct ContextDecision { ENUM_ENTRY(TYPE_XMM32, "4-byte XMM register or memory operand") \ ENUM_ENTRY(TYPE_XMM64, "8-byte") \ ENUM_ENTRY(TYPE_XMM128, "16-byte") \ + ENUM_ENTRY(TYPE_XMM256, "32-byte") \ ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \ ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \ ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \ diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index d6950f4..dd6e353 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "asm-printer" #include "X86ATTInstPrinter.h" #include "X86InstComments.h" +#include "X86Subtarget.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" @@ -22,24 +23,38 @@ #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" #include "X86GenInstrNames.inc" +#include <map> using namespace llvm; // Include the auto-generated portion of the assembly writer. #define GET_INSTRUCTION_NAME +#define PRINT_ALIAS_INSTR +#include "X86GenRegisterNames.inc" #include "X86GenAsmWriter.inc" +#undef PRINT_ALIAS_INSTR +#undef GET_INSTRUCTION_NAME + +X86ATTInstPrinter::X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI) + : MCInstPrinter(MAI) { + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures( + &TM.getSubtarget<X86Subtarget>())); +} void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) { - printInstruction(MI, OS); + // Try to print any aliases first. + if (!printAliasInstr(MI, OS)) + printInstruction(MI, OS); // If verbose assembly is enabled, we can print some informative comments. if (CommentStream) EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); } + StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const { return getInstructionName(Opcode); } - void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O) { switch (MI->getOperand(Op).getImm()) { diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h index eb98664..8d69391 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h @@ -17,16 +17,24 @@ #include "llvm/MC/MCInstPrinter.h" namespace llvm { - class MCOperand; + +class MCOperand; +class X86Subtarget; +class TargetMachine; class X86ATTInstPrinter : public MCInstPrinter { public: - X86ATTInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {} - + X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI); virtual void printInst(const MCInst *MI, raw_ostream &OS); virtual StringRef getOpcodeName(unsigned Opcode) const; + // Methods used to print the alias of an instruction. + unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const; + // Autogenerated by tblgen, returns true if we successfully printed an + // alias. + bool printAliasInstr(const MCInst *MI, raw_ostream &OS); + // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &OS); static const char *getRegisterName(unsigned RegNo); diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 12144e3..c642acc 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -111,28 +111,28 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, // FALL THROUGH. case X86::PUNPCKLBWrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLMask(16, ShuffleMask); + DecodePUNPCKLBWMask(16, ShuffleMask); break; case X86::PUNPCKLWDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::PUNPCKLWDrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLMask(8, ShuffleMask); + DecodePUNPCKLWDMask(8, ShuffleMask); break; case X86::PUNPCKLDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::PUNPCKLDQrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLMask(4, ShuffleMask); + DecodePUNPCKLDQMask(4, ShuffleMask); break; case X86::PUNPCKLQDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::PUNPCKLQDQrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLMask(2, ShuffleMask); + DecodePUNPCKLQDQMask(2, ShuffleMask); break; case X86::SHUFPDrri: @@ -153,16 +153,44 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKLPDrm: - DecodeUNPCKLPMask(2, ShuffleMask); + DecodeUNPCKLPDMask(2, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; + case X86::VUNPCKLPDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKLPDrm: + DecodeUNPCKLPDMask(2, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + break; + case X86::VUNPCKLPDYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKLPDYrm: + DecodeUNPCKLPDMask(4, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + break; case X86::UNPCKLPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKLPSrm: - DecodeUNPCKLPMask(4, ShuffleMask); + DecodeUNPCKLPSMask(4, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; + case X86::VUNPCKLPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKLPSrm: + DecodeUNPCKLPSMask(4, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + break; + case X86::VUNPCKLPSYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKLPSYrm: + DecodeUNPCKLPSMask(8, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + break; case X86::UNPCKHPDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 0484529..47253eb 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "asm-printer" #include "X86IntelInstPrinter.h" #include "X86InstComments.h" +#include "X86Subtarget.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h index 6f12032..ca99dc0 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h @@ -18,13 +18,15 @@ #include "llvm/Support/raw_ostream.h" namespace llvm { - class MCOperand; + +class MCOperand; +class TargetMachine; class X86IntelInstPrinter : public MCInstPrinter { public: - X86IntelInstPrinter(const MCAsmInfo &MAI) + X86IntelInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI) : MCInstPrinter(MAI) {} - + virtual void printInst(const MCInst *MI, raw_ostream &OS); virtual StringRef getOpcodeName(unsigned Opcode) const; @@ -33,7 +35,6 @@ public: static const char *getRegisterName(unsigned RegNo); static const char *getInstructionName(unsigned Opcode); - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O); void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O); diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt index e21d69a..e7429a3 100644 --- a/lib/Target/X86/README-X86-64.txt +++ b/lib/Target/X86/README-X86-64.txt @@ -36,7 +36,7 @@ _conv: cmovb %rcx, %rax ret -Seems like the jb branch has high likelyhood of being taken. It would have +Seems like the jb branch has high likelihood of being taken. It would have saved a few instructions. //===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index abd1515..ea3014e 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -7,14 +7,6 @@ copy (3-addr bswap + memory support?) This is available on Atom processors. //===---------------------------------------------------------------------===// -CodeGen/X86/lea-3.ll:test3 should be a single LEA, not a shift/move. The X86 -backend knows how to three-addressify this shift, but it appears the register -allocator isn't even asking it to do so in this case. We should investigate -why this isn't happening, it could have significant impact on other important -cases for X86 as well. - -//===---------------------------------------------------------------------===// - This should be one DIV/IDIV instruction, not a libcall: unsigned test(unsigned long long X, unsigned Y) { @@ -1572,7 +1564,7 @@ Implement processor-specific optimizations for parity with GCC on these processors. GCC does two optimizations: 1. ix86_pad_returns inserts a noop before ret instructions if immediately - preceeded by a conditional branch or is the target of a jump. + preceded by a conditional branch or is the target of a jump. 2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of code contains more than 3 branches. @@ -1656,28 +1648,61 @@ information to add the "lock" prefix. //===---------------------------------------------------------------------===// -_Bool bar(int *x) { return *x & 1; } +struct B { + unsigned char y0 : 1; +}; -define zeroext i1 @bar(i32* nocapture %x) nounwind readonly { -entry: - %tmp1 = load i32* %x ; <i32> [#uses=1] - %and = and i32 %tmp1, 1 ; <i32> [#uses=1] - %tobool = icmp ne i32 %and, 0 ; <i1> [#uses=1] - ret i1 %tobool +int bar(struct B* a) { return a->y0; } + +define i32 @bar(%struct.B* nocapture %a) nounwind readonly optsize { + %1 = getelementptr inbounds %struct.B* %a, i64 0, i32 0 + %2 = load i8* %1, align 1 + %3 = and i8 %2, 1 + %4 = zext i8 %3 to i32 + ret i32 %4 } -bar: # @bar -# BB#0: # %entry - movl 4(%esp), %eax - movb (%eax), %al - andb $1, %al - movzbl %al, %eax - ret +bar: # @bar +# BB#0: + movb (%rdi), %al + andb $1, %al + movzbl %al, %eax + ret Missed optimization: should be movl+andl. //===---------------------------------------------------------------------===// +The x86_64 abi says: + +Booleans, when stored in a memory object, are stored as single byte objects the +value of which is always 0 (false) or 1 (true). + +We are not using this fact: + +int bar(_Bool *a) { return *a; } + +define i32 @bar(i8* nocapture %a) nounwind readonly optsize { + %1 = load i8* %a, align 1, !tbaa !0 + %tmp = and i8 %1, 1 + %2 = zext i8 %tmp to i32 + ret i32 %2 +} + +bar: + movb (%rdi), %al + andb $1, %al + movzbl %al, %eax + ret + +GCC produces + +bar: + movzbl (%rdi), %eax + ret + +//===---------------------------------------------------------------------===// + Consider the following two functions compiled with clang: _Bool foo(int *x) { return !(*x & 4); } unsigned bar(int *x) { return !(*x & 4); } @@ -1703,26 +1728,6 @@ are functionally identical. //===---------------------------------------------------------------------===// Take the following C code: -int x(int y) { return (y & 63) << 14; } - -Code produced by gcc: - andl $63, %edi - sall $14, %edi - movl %edi, %eax - ret - -Code produced by clang: - shll $14, %edi - movl %edi, %eax - andl $1032192, %eax - ret - -The code produced by gcc is 3 bytes shorter. This sort of construct often -shows up with bitfields. - -//===---------------------------------------------------------------------===// - -Take the following C code: int f(int a, int b) { return (unsigned char)a == (unsigned char)b; } We generate the following IR with clang: @@ -1947,3 +1952,91 @@ which is "perfect". //===---------------------------------------------------------------------===// +For the branch in the following code: +int a(); +int b(int x, int y) { + if (x & (1<<(y&7))) + return a(); + return y; +} + +We currently generate: + movb %sil, %al + andb $7, %al + movzbl %al, %eax + btl %eax, %edi + jae .LBB0_2 + +movl+andl would be shorter than the movb+andb+movzbl sequence. + +//===---------------------------------------------------------------------===// + +For the following: +struct u1 { + float x, y; +}; +float foo(struct u1 u) { + return u.x + u.y; +} + +We currently generate: + movdqa %xmm0, %xmm1 + pshufd $1, %xmm0, %xmm0 # xmm0 = xmm0[1,0,0,0] + addss %xmm1, %xmm0 + ret + +We could save an instruction here by commuting the addss. + +//===---------------------------------------------------------------------===// + +This (from PR9661): + +float clamp_float(float a) { + if (a > 1.0f) + return 1.0f; + else if (a < 0.0f) + return 0.0f; + else + return a; +} + +Could compile to: + +clamp_float: # @clamp_float + movss .LCPI0_0(%rip), %xmm1 + minss %xmm1, %xmm0 + pxor %xmm1, %xmm1 + maxss %xmm1, %xmm0 + ret + +with -ffast-math. + +//===---------------------------------------------------------------------===// + +This function (from PR9803): + +int clamp2(int a) { + if (a > 5) + a = 5; + if (a < 0) + return 0; + return a; +} + +Compiles to: + +_clamp2: ## @clamp2 + pushq %rbp + movq %rsp, %rbp + cmpl $5, %edi + movl $5, %ecx + cmovlel %edi, %ecx + testl %ecx, %ecx + movl $0, %eax + cmovnsl %ecx, %eax + popq %rbp + ret + +The move of 0 could be scheduled above the test to make it is xor reg,reg. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 1287977..cd06060 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -1,4 +1,4 @@ -//===-- X86ShuffleDecode.h - X86 shuffle decode logic ---------------------===// +//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===// // // The LLVM Compiler Infrastructure // @@ -95,12 +95,29 @@ void DecodePSHUFLWMask(unsigned Imm, ShuffleMask.push_back(7); } -void DecodePUNPCKLMask(unsigned NElts, +void DecodePUNPCKLBWMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i8, NElts), ShuffleMask); +} + +void DecodePUNPCKLWDMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i16, NElts), ShuffleMask); +} + +void DecodePUNPCKLDQMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask); +} + +void DecodePUNPCKLQDQMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask); +} + +void DecodePUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { - for (unsigned i = 0; i != NElts/2; ++i) { - ShuffleMask.push_back(i); - ShuffleMask.push_back(i+NElts); - } + DecodeUNPCKLPMask(VT, ShuffleMask); } void DecodePUNPCKHMask(unsigned NElts, @@ -133,15 +150,40 @@ void DecodeUNPCKHPMask(unsigned NElts, } } +void DecodeUNPCKLPSMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask); +} + +void DecodeUNPCKLPDMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask); +} /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd -/// etc. NElts indicates the number of elements in the vector allowing it to -/// handle different datatypes and vector widths. -void DecodeUNPCKLPMask(unsigned NElts, +/// etc. VT indicates the type of the vector allowing it to handle different +/// datatypes and vector widths. +void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { - for (unsigned i = 0; i != NElts/2; ++i) { - ShuffleMask.push_back(i); // Reads from dest - ShuffleMask.push_back(i+NElts); // Reads from src + unsigned NumElts = VT.getVectorNumElements(); + + // Handle vector lengths > 128 bits. Define a "section" as a set of + // 128 bits. AVX defines UNPCK* to operate independently on 128-bit + // sections. + unsigned NumSections = VT.getSizeInBits() / 128; + if (NumSections == 0 ) NumSections = 1; // Handle MMX + unsigned NumSectionElts = NumElts / NumSections; + + unsigned Start = 0; + unsigned End = NumSectionElts / 2; + for (unsigned s = 0; s < NumSections; ++s) { + for (unsigned i = Start; i != End; ++i) { + ShuffleMask.push_back(i); // Reads from dest/src1 + ShuffleMask.push_back(i+NumSectionElts); // Reads from src/src2 + } + // Process the next 128 bits. + Start += NumSectionElts; + End += NumSectionElts; } } diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 50d9ccb..b18f670 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -16,6 +16,7 @@ #define X86_SHUFFLE_DECODE_H #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/ValueTypes.h" //===----------------------------------------------------------------------===// // Vector Mask Decoding @@ -45,7 +46,19 @@ void DecodePSHUFHWMask(unsigned Imm, void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask); -void DecodePUNPCKLMask(unsigned NElts, +void DecodePUNPCKLBWMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodePUNPCKLWDMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodePUNPCKLDQMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodePUNPCKLQDQMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodePUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); void DecodePUNPCKHMask(unsigned NElts, @@ -57,11 +70,16 @@ void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, void DecodeUNPCKHPMask(unsigned NElts, SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeUNPCKLPSMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodeUNPCKLPDMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd -/// etc. NElts indicates the number of elements in the vector allowing it to -/// handle different datatypes and vector widths. -void DecodeUNPCKLPMask(unsigned NElts, +/// etc. VT indicates the type of the vector allowing it to handle different +/// datatypes and vector widths. +void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); } // llvm namespace diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index efb6c8c..25b8d3e 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -1,13 +1,13 @@ //===- X86.td - Target definition file for the Intel X86 ---*- tablegen -*-===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // -// This is a target description file for the Intel i386 architecture, refered to +// This is a target description file for the Intel i386 architecture, referred to // here as the "X86" architecture. // //===----------------------------------------------------------------------===// @@ -32,7 +32,7 @@ def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX", def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", "Enable SSE instructions", // SSE codegen depends on cmovs, and all - // SSE1+ processors support them. + // SSE1+ processors support them. [FeatureMMX, FeatureCMOV]>; def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2", "Enable SSE2 instructions", @@ -50,7 +50,8 @@ def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42", "Enable SSE 4.2 instructions", [FeatureSSE41, FeaturePOPCNT]>; def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", - "Enable 3DNow! instructions">; + "Enable 3DNow! instructions", + [FeatureMMX]>; def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", "Enable 3DNow! Athlon instructions", [Feature3DNow]>; @@ -125,10 +126,10 @@ def : Proc<"sandybridge", [FeatureSSE42, Feature64Bit, FeatureAES, FeatureCLMUL]>; def : Proc<"k6", [FeatureMMX]>; -def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>; -def : Proc<"k6-3", [FeatureMMX, Feature3DNow]>; -def : Proc<"athlon", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>; -def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>; +def : Proc<"k6-2", [Feature3DNow]>; +def : Proc<"k6-3", [Feature3DNow]>; +def : Proc<"athlon", [Feature3DNowA, FeatureSlowBTMem]>; +def : Proc<"athlon-tbird", [Feature3DNowA, FeatureSlowBTMem]>; def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>; def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>; def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>; @@ -156,8 +157,8 @@ def : Proc<"shanghai", [Feature3DNowA, Feature64Bit, FeatureSSE4A, Feature3DNowA]>; def : Proc<"winchip-c6", [FeatureMMX]>; -def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>; -def : Proc<"c3", [FeatureMMX, Feature3DNow]>; +def : Proc<"winchip2", [Feature3DNow]>; +def : Proc<"c3", [Feature3DNow]>; def : Proc<"c3-2", [FeatureSSE1]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp index da5f5b1..4d7d96d 100644 --- a/lib/Target/X86/X86AsmBackend.cpp +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -21,6 +21,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Object/MachOFormat.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -28,6 +29,13 @@ #include "llvm/Target/TargetAsmBackend.h" using namespace llvm; +// Option to allow disabling arithmetic relaxation to workaround PR9807, which +// is useful when running bitwise comparison experiments on Darwin. We should be +// able to remove this once PR9807 is resolved. +static cl::opt<bool> +MCDisableArithRelaxation("mc-x86-disable-arith-relaxation", + cl::desc("Disable relaxation of arithmetic instruction for X86")); + static unsigned getFixupKindLog2Size(unsigned Kind) { switch (Kind) { default: assert(0 && "invalid fixup kind!"); @@ -201,6 +209,9 @@ bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const { if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode()) return true; + if (MCDisableArithRelaxation) + return false; + // Check if this instruction is ever relaxable. if (getRelaxedOpcodeArith(Inst.getOpcode()) == Inst.getOpcode()) return false; @@ -307,10 +318,13 @@ public: : ELFX86AsmBackend(T, OSType) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createELFObjectWriter(new X86ELFObjectWriter(false, OSType, - ELF::EM_386, false), + return createELFObjectWriter(createELFObjectTargetWriter(), OS, /*IsLittleEndian*/ true); } + + MCELFObjectTargetWriter *createELFObjectTargetWriter() const { + return new X86ELFObjectWriter(false, OSType, ELF::EM_386, false); + } }; class ELFX86_64AsmBackend : public ELFX86AsmBackend { @@ -319,10 +333,13 @@ public: : ELFX86AsmBackend(T, OSType) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createELFObjectWriter(new X86ELFObjectWriter(true, OSType, - ELF::EM_X86_64, true), + return createELFObjectWriter(createELFObjectTargetWriter(), OS, /*IsLittleEndian*/ true); } + + MCELFObjectTargetWriter *createELFObjectTargetWriter() const { + return new X86ELFObjectWriter(true, OSType, ELF::EM_X86_64, true); + } }; class WindowsX86AsmBackend : public X86AsmBackend { @@ -408,34 +425,26 @@ public: TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, const std::string &TT) { - switch (Triple(TT).getOS()) { - case Triple::Darwin: + Triple TheTriple(TT); + + if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) return new DarwinX86_32AsmBackend(T); - case Triple::MinGW32: - case Triple::Cygwin: - case Triple::Win32: - if (Triple(TT).getEnvironment() == Triple::MachO) - return new DarwinX86_32AsmBackend(T); - else - return new WindowsX86AsmBackend(T, false); - default: - return new ELFX86_32AsmBackend(T, Triple(TT).getOS()); - } + + if (TheTriple.isOSWindows()) + return new WindowsX86AsmBackend(T, false); + + return new ELFX86_32AsmBackend(T, TheTriple.getOS()); } TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T, const std::string &TT) { - switch (Triple(TT).getOS()) { - case Triple::Darwin: + Triple TheTriple(TT); + + if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) return new DarwinX86_64AsmBackend(T); - case Triple::MinGW32: - case Triple::Cygwin: - case Triple::Win32: - if (Triple(TT).getEnvironment() == Triple::MachO) - return new DarwinX86_64AsmBackend(T); - else - return new WindowsX86AsmBackend(T, true); - default: - return new ELFX86_64AsmBackend(T, Triple(TT).getOS()); - } + + if (TheTriple.isOSWindows()) + return new WindowsX86AsmBackend(T, true); + + return new ELFX86_64AsmBackend(T, TheTriple.getOS()); } diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 99b4479..c2d53c4 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -709,12 +709,13 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, //===----------------------------------------------------------------------===// static MCInstPrinter *createX86MCInstPrinter(const Target &T, + TargetMachine &TM, unsigned SyntaxVariant, const MCAsmInfo &MAI) { if (SyntaxVariant == 0) - return new X86ATTInstPrinter(MAI); + return new X86ATTInstPrinter(TM, MAI); if (SyntaxVariant == 1) - return new X86IntelInstPrinter(MAI); + return new X86IntelInstPrinter(TM, MAI); return 0; } diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index a44fb69..5635175 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -215,6 +215,13 @@ def CC_X86_Win64_C : CallingConv<[ // The first 4 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ], [XMM0, XMM1, XMM2, XMM3]>>, + + // Do not pass the sret argument in RCX, the Win64 thiscall calling + // convention requires "this" to be passed in RCX. + CCIfCC<"CallingConv::X86_ThisCall", + CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[RDX , R8 , R9 ], + [XMM1, XMM2, XMM3]>>>>, + CCIfType<[i64], CCAssignToRegWithShadow<[RCX , RDX , R8 , R9 ], [XMM0, XMM1, XMM2, XMM3]>>, diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 60d9d4a..421e221 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -652,6 +652,8 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, case X86II::TB: // Two-byte opcode prefix case X86II::T8: // 0F 38 case X86II::TA: // 0F 3A + case X86II::A6: // 0F A6 + case X86II::A7: // 0F A7 Need0FPrefix = true; break; case X86II::TF: // F2 0F 38 @@ -695,6 +697,12 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, case X86II::TA: // 0F 3A MCE.emitByte(0x3A); break; + case X86II::A6: // 0F A6 + MCE.emitByte(0xA6); + break; + case X86II::A7: // 0F A7 + MCE.emitByte(0xA7); + break; } // If this is a two-address instruction, skip one of the register operands. diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 6fa9284..1382f18 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -23,6 +23,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Operator.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -77,10 +78,8 @@ private: bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR); - bool X86FastEmitStore(EVT VT, const Value *Val, - const X86AddressMode &AM); - bool X86FastEmitStore(EVT VT, unsigned Val, - const X86AddressMode &AM); + bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM); + bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM); bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, unsigned &ResultReg); @@ -125,6 +124,8 @@ private: unsigned TargetMaterializeAlloca(const AllocaInst *C); + unsigned TargetMaterializeFloatZero(const ConstantFP *CF); + /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is /// computed in an SSE register, not on the X87 floating point stack. bool isScalarFPTypeInSSEReg(EVT VT) const { @@ -133,6 +134,9 @@ private: } bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false); + + bool TryEmitSmallMemcpy(X86AddressMode DestAM, + X86AddressMode SrcAM, uint64_t Len); }; } // end anonymous namespace. @@ -224,8 +228,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, /// and a displacement offset, or a GlobalAddress, /// i.e. V. Return true if it is possible. bool -X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, - const X86AddressMode &AM) { +X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) { // Get opcode and regclass of the output for the given store instruction. unsigned Opc = 0; switch (VT.getSimpleVT().SimpleTy) { @@ -395,37 +398,45 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { const Value *Op = *i; if (const StructType *STy = dyn_cast<StructType>(*GTI)) { const StructLayout *SL = TD.getStructLayout(STy); - unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); - Disp += SL->getElementOffset(Idx); - } else { - uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); - SmallVector<const Value *, 4> Worklist; - Worklist.push_back(Op); - do { - Op = Worklist.pop_back_val(); - if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { - // Constant-offset addressing. - Disp += CI->getSExtValue() * S; - } else if (isa<AddOperator>(Op) && - isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) { - // An add with a constant operand. Fold the constant. - ConstantInt *CI = - cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); - Disp += CI->getSExtValue() * S; - // Add the other operand back to the work list. - Worklist.push_back(cast<AddOperator>(Op)->getOperand(0)); - } else if (IndexReg == 0 && - (!AM.GV || !Subtarget->isPICStyleRIPRel()) && - (S == 1 || S == 2 || S == 4 || S == 8)) { - // Scaled-index addressing. - Scale = S; - IndexReg = getRegForGEPIndex(Op).first; - if (IndexReg == 0) - return false; - } else - // Unsupported. - goto unsupported_gep; - } while (!Worklist.empty()); + Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue()); + continue; + } + + // A array/variable index is always of the form i*S where S is the + // constant scale size. See if we can push the scale into immediates. + uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); + for (;;) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { + // Constant-offset addressing. + Disp += CI->getSExtValue() * S; + break; + } + if (isa<AddOperator>(Op) && + (!isa<Instruction>(Op) || + FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()] + == FuncInfo.MBB) && + isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) { + // An add (in the same block) with a constant operand. Fold the + // constant. + ConstantInt *CI = + cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); + Disp += CI->getSExtValue() * S; + // Iterate on the other operand. + Op = cast<AddOperator>(Op)->getOperand(0); + continue; + } + if (IndexReg == 0 && + (!AM.GV || !Subtarget->isPICStyleRIPRel()) && + (S == 1 || S == 2 || S == 4 || S == 8)) { + // Scaled-index addressing. + Scale = S; + IndexReg = getRegForGEPIndex(Op).first; + if (IndexReg == 0) + return false; + break; + } + // Unsupported. + goto unsupported_gep; } } // Check for displacement overflow. @@ -439,7 +450,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { if (X86SelectAddress(U->getOperand(0), AM)) return true; - // If we couldn't merge the sub value into this addr mode, revert back to + // If we couldn't merge the gep value into this addr mode, revert back to // our address and just match the value instead of completely failing. AM = SavedAM; break; @@ -451,91 +462,91 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { // Handle constant address. if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { - // Can't handle alternate code models yet. + // Can't handle alternate code models or TLS yet. if (TM.getCodeModel() != CodeModel::Small) return false; - // RIP-relative addresses can't have additional register operands. - if (Subtarget->isPICStyleRIPRel() && - (AM.Base.Reg != 0 || AM.IndexReg != 0)) - return false; - - // Can't handle TLS yet. if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) if (GVar->isThreadLocal()) return false; + + // RIP-relative addresses can't have additional register operands, so if + // we've already folded stuff into the addressing mode, just force the + // global value into its own register, which we can use as the basereg. + if (!Subtarget->isPICStyleRIPRel() || + (AM.Base.Reg == 0 && AM.IndexReg == 0)) { + // Okay, we've committed to selecting this global. Set up the address. + AM.GV = GV; + + // Allow the subtarget to classify the global. + unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM); + + // If this reference is relative to the pic base, set it now. + if (isGlobalRelativeToPICBase(GVFlags)) { + // FIXME: How do we know Base.Reg is free?? + AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); + } - // Okay, we've committed to selecting this global. Set up the basic address. - AM.GV = GV; - - // Allow the subtarget to classify the global. - unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM); - - // If this reference is relative to the pic base, set it now. - if (isGlobalRelativeToPICBase(GVFlags)) { - // FIXME: How do we know Base.Reg is free?? - AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); - } - - // Unless the ABI requires an extra load, return a direct reference to - // the global. - if (!isGlobalStubReference(GVFlags)) { - if (Subtarget->isPICStyleRIPRel()) { - // Use rip-relative addressing if we can. Above we verified that the - // base and index registers are unused. - assert(AM.Base.Reg == 0 && AM.IndexReg == 0); - AM.Base.Reg = X86::RIP; + // Unless the ABI requires an extra load, return a direct reference to + // the global. + if (!isGlobalStubReference(GVFlags)) { + if (Subtarget->isPICStyleRIPRel()) { + // Use rip-relative addressing if we can. Above we verified that the + // base and index registers are unused. + assert(AM.Base.Reg == 0 && AM.IndexReg == 0); + AM.Base.Reg = X86::RIP; + } + AM.GVOpFlags = GVFlags; + return true; } - AM.GVOpFlags = GVFlags; - return true; - } - // Ok, we need to do a load from a stub. If we've already loaded from this - // stub, reuse the loaded pointer, otherwise emit the load now. - DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V); - unsigned LoadReg; - if (I != LocalValueMap.end() && I->second != 0) { - LoadReg = I->second; - } else { - // Issue load from stub. - unsigned Opc = 0; - const TargetRegisterClass *RC = NULL; - X86AddressMode StubAM; - StubAM.Base.Reg = AM.Base.Reg; - StubAM.GV = GV; - StubAM.GVOpFlags = GVFlags; - - // Prepare for inserting code in the local-value area. - SavePoint SaveInsertPt = enterLocalValueArea(); - - if (TLI.getPointerTy() == MVT::i64) { - Opc = X86::MOV64rm; - RC = X86::GR64RegisterClass; - - if (Subtarget->isPICStyleRIPRel()) - StubAM.Base.Reg = X86::RIP; + // Ok, we need to do a load from a stub. If we've already loaded from + // this stub, reuse the loaded pointer, otherwise emit the load now. + DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V); + unsigned LoadReg; + if (I != LocalValueMap.end() && I->second != 0) { + LoadReg = I->second; } else { - Opc = X86::MOV32rm; - RC = X86::GR32RegisterClass; - } + // Issue load from stub. + unsigned Opc = 0; + const TargetRegisterClass *RC = NULL; + X86AddressMode StubAM; + StubAM.Base.Reg = AM.Base.Reg; + StubAM.GV = GV; + StubAM.GVOpFlags = GVFlags; + + // Prepare for inserting code in the local-value area. + SavePoint SaveInsertPt = enterLocalValueArea(); + + if (TLI.getPointerTy() == MVT::i64) { + Opc = X86::MOV64rm; + RC = X86::GR64RegisterClass; + + if (Subtarget->isPICStyleRIPRel()) + StubAM.Base.Reg = X86::RIP; + } else { + Opc = X86::MOV32rm; + RC = X86::GR32RegisterClass; + } - LoadReg = createResultReg(RC); - MachineInstrBuilder LoadMI = - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg); - addFullAddress(LoadMI, StubAM); + LoadReg = createResultReg(RC); + MachineInstrBuilder LoadMI = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg); + addFullAddress(LoadMI, StubAM); - // Ok, back to normal mode. - leaveLocalValueArea(SaveInsertPt); + // Ok, back to normal mode. + leaveLocalValueArea(SaveInsertPt); - // Prevent loading GV stub multiple times in same MBB. - LocalValueMap[V] = LoadReg; - } + // Prevent loading GV stub multiple times in same MBB. + LocalValueMap[V] = LoadReg; + } - // Now construct the final address. Note that the Disp, Scale, - // and Index values may already be set here. - AM.Base.Reg = LoadReg; - AM.GV = 0; - return true; + // Now construct the final address. Note that the Disp, Scale, + // and Index values may already be set here. + AM.Base.Reg = LoadReg; + AM.GV = 0; + return true; + } } // If all else fails, try to materialize the value in a register. @@ -856,12 +867,9 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { unsigned NEReg = createResultReg(&X86::GR8RegClass); unsigned PReg = createResultReg(&X86::GR8RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(X86::SETNEr), NEReg); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(X86::SETPr), PReg); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(X86::OR8rr), ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETNEr), NEReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETPr), PReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::OR8rr),ResultReg) .addReg(PReg).addReg(NEReg); UpdateValueMap(I, ResultReg); return true; @@ -1059,14 +1067,49 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { } } } + } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) { + // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which + // typically happen for _Bool and C++ bools. + MVT SourceVT; + if (TI->hasOneUse() && TI->getParent() == I->getParent() && + isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) { + unsigned TestOpc = 0; + switch (SourceVT.SimpleTy) { + default: break; + case MVT::i8: TestOpc = X86::TEST8ri; break; + case MVT::i16: TestOpc = X86::TEST16ri; break; + case MVT::i32: TestOpc = X86::TEST32ri; break; + case MVT::i64: TestOpc = X86::TEST64ri32; break; + } + if (TestOpc) { + unsigned OpReg = getRegForValue(TI->getOperand(0)); + if (OpReg == 0) return false; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TestOpc)) + .addReg(OpReg).addImm(1); + + unsigned JmpOpc = X86::JNE_4; + if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) { + std::swap(TrueMBB, FalseMBB); + JmpOpc = X86::JE_4; + } + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(JmpOpc)) + .addMBB(TrueMBB); + FastEmitBranch(FalseMBB, DL); + FuncInfo.MBB->addSuccessor(TrueMBB); + return true; + } + } } // Otherwise do a clumsy setcc and re-test it. + // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used + // in an explicit cast, so make sure to handle that correctly. unsigned OpReg = getRegForValue(BI->getCondition()); if (OpReg == 0) return false; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr)) - .addReg(OpReg).addReg(OpReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8ri)) + .addReg(OpReg).addImm(1); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JNE_4)) .addMBB(TrueMBB); FastEmitBranch(FalseMBB, DL); @@ -1075,42 +1118,42 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { } bool X86FastISel::X86SelectShift(const Instruction *I) { - unsigned CReg = 0, OpReg = 0, OpImm = 0; + unsigned CReg = 0, OpReg = 0; const TargetRegisterClass *RC = NULL; if (I->getType()->isIntegerTy(8)) { CReg = X86::CL; RC = &X86::GR8RegClass; switch (I->getOpcode()) { - case Instruction::LShr: OpReg = X86::SHR8rCL; OpImm = X86::SHR8ri; break; - case Instruction::AShr: OpReg = X86::SAR8rCL; OpImm = X86::SAR8ri; break; - case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break; + case Instruction::LShr: OpReg = X86::SHR8rCL; break; + case Instruction::AShr: OpReg = X86::SAR8rCL; break; + case Instruction::Shl: OpReg = X86::SHL8rCL; break; default: return false; } } else if (I->getType()->isIntegerTy(16)) { CReg = X86::CX; RC = &X86::GR16RegClass; switch (I->getOpcode()) { - case Instruction::LShr: OpReg = X86::SHR16rCL; OpImm = X86::SHR16ri; break; - case Instruction::AShr: OpReg = X86::SAR16rCL; OpImm = X86::SAR16ri; break; - case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break; + case Instruction::LShr: OpReg = X86::SHR16rCL; break; + case Instruction::AShr: OpReg = X86::SAR16rCL; break; + case Instruction::Shl: OpReg = X86::SHL16rCL; break; default: return false; } } else if (I->getType()->isIntegerTy(32)) { CReg = X86::ECX; RC = &X86::GR32RegClass; switch (I->getOpcode()) { - case Instruction::LShr: OpReg = X86::SHR32rCL; OpImm = X86::SHR32ri; break; - case Instruction::AShr: OpReg = X86::SAR32rCL; OpImm = X86::SAR32ri; break; - case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break; + case Instruction::LShr: OpReg = X86::SHR32rCL; break; + case Instruction::AShr: OpReg = X86::SAR32rCL; break; + case Instruction::Shl: OpReg = X86::SHL32rCL; break; default: return false; } } else if (I->getType()->isIntegerTy(64)) { CReg = X86::RCX; RC = &X86::GR64RegClass; switch (I->getOpcode()) { - case Instruction::LShr: OpReg = X86::SHR64rCL; OpImm = X86::SHR64ri; break; - case Instruction::AShr: OpReg = X86::SAR64rCL; OpImm = X86::SAR64ri; break; - case Instruction::Shl: OpReg = X86::SHL64rCL; OpImm = X86::SHL64ri; break; + case Instruction::LShr: OpReg = X86::SHR64rCL; break; + case Instruction::AShr: OpReg = X86::SAR64rCL; break; + case Instruction::Shl: OpReg = X86::SHL64rCL; break; default: return false; } } else { @@ -1124,15 +1167,6 @@ bool X86FastISel::X86SelectShift(const Instruction *I) { unsigned Op0Reg = getRegForValue(I->getOperand(0)); if (Op0Reg == 0) return false; - // Fold immediate in shl(x,3). - if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm), - ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff); - UpdateValueMap(I, ResultReg); - return true; - } - unsigned Op1Reg = getRegForValue(I->getOperand(1)); if (Op1Reg == 0) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), @@ -1294,10 +1328,61 @@ bool X86FastISel::X86SelectExtractValue(const Instruction *I) { return false; } +bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM, + X86AddressMode SrcAM, uint64_t Len) { + // Make sure we don't bloat code by inlining very large memcpy's. + bool i64Legal = TLI.isTypeLegal(MVT::i64); + if (Len > (i64Legal ? 32 : 16)) return false; + + // We don't care about alignment here since we just emit integer accesses. + while (Len) { + MVT VT; + if (Len >= 8 && i64Legal) + VT = MVT::i64; + else if (Len >= 4) + VT = MVT::i32; + else if (Len >= 2) + VT = MVT::i16; + else { + assert(Len == 1); + VT = MVT::i8; + } + + unsigned Reg; + bool RV = X86FastEmitLoad(VT, SrcAM, Reg); + RV &= X86FastEmitStore(VT, Reg, DestAM); + assert(RV && "Failed to emit load or store??"); + + unsigned Size = VT.getSizeInBits()/8; + Len -= Size; + DestAM.Disp += Size; + SrcAM.Disp += Size; + } + + return true; +} + bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { // FIXME: Handle more intrinsics. switch (I.getIntrinsicID()) { default: return false; + case Intrinsic::memcpy: { + const MemCpyInst &MCI = cast<MemCpyInst>(I); + // Don't handle volatile or variable length memcpys. + if (MCI.isVolatile() || !isa<ConstantInt>(MCI.getLength())) + return false; + + uint64_t Len = cast<ConstantInt>(MCI.getLength())->getZExtValue(); + + // Get the address of the dest and source addresses. + X86AddressMode DestAM, SrcAM; + if (!X86SelectAddress(MCI.getRawDest(), DestAM) || + !X86SelectAddress(MCI.getRawSource(), SrcAM)) + return false; + + return TryEmitSmallMemcpy(DestAM, SrcAM, Len); + } + case Intrinsic::stackprotector: { // Emit code inline code to store the stack guard onto the stack. EVT PtrTy = TLI.getPointerTy(); @@ -1308,17 +1393,14 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { // Grab the frame index. X86AddressMode AM; if (!X86SelectAddress(Slot, AM)) return false; - if (!X86FastEmitStore(PtrTy, Op1, AM)) return false; - return true; } case Intrinsic::objectsize: { - ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1)); + // FIXME: This should be moved to generic code! + ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); const Type *Ty = I.getCalledFunction()->getReturnType(); - assert(CI && "Non-constant type in Intrinsic::objectsize?"); - MVT VT; if (!isTypeLegal(Ty, VT)) return false; @@ -1356,6 +1438,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { } case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: { + // FIXME: Should fold immediates. + // Replace "add with overflow" intrinsics with an "add" instruction followed // by a seto/setc instruction. Later on, when the "extractvalue" // instructions are encountered, we use the fact that two registers were @@ -1427,8 +1511,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // Handle only C and fastcc calling conventions for now. ImmutableCallSite CS(CI); CallingConv::ID CC = CS.getCallingConv(); - if (CC != CallingConv::C && - CC != CallingConv::Fast && + if (CC != CallingConv::C && CC != CallingConv::Fast && CC != CallingConv::X86_FastCall) return false; @@ -1437,14 +1520,17 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (CC == CallingConv::Fast && GuaranteedTailCallOpt) return false; - // Let SDISel handle vararg functions. const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); - if (FTy->isVarArg()) + bool isVarArg = FTy->isVarArg(); + + // Don't know how to handle Win64 varargs yet. Nothing special needed for + // x86-32. Special handling for x86-64 is implemented. + if (isVarArg && Subtarget->isTargetWin64()) return false; // Fast-isel doesn't know about callee-pop yet. - if (Subtarget->IsCalleePop(FTy->isVarArg(), CC)) + if (Subtarget->IsCalleePop(isVarArg, CC)) return false; // Handle *simple* calls for now. @@ -1487,9 +1573,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { ArgFlags.reserve(CS.arg_size()); for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { - unsigned Arg = getRegForValue(*i); - if (Arg == 0) - return false; + Value *ArgVal = *i; ISD::ArgFlagsTy Flags; unsigned AttrInd = i - CS.arg_begin() + 1; if (CS.paramHasAttr(AttrInd, Attribute::SExt)) @@ -1497,34 +1581,67 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) Flags.setZExt(); + // If this is an i1/i8/i16 argument, promote to i32 to avoid an extra + // instruction. This is safe because it is common to all fastisel supported + // calling conventions on x86. + if (ConstantInt *CI = dyn_cast<ConstantInt>(ArgVal)) { + if (CI->getBitWidth() == 1 || CI->getBitWidth() == 8 || + CI->getBitWidth() == 16) { + if (Flags.isSExt()) + ArgVal = ConstantExpr::getSExt(CI,Type::getInt32Ty(CI->getContext())); + else + ArgVal = ConstantExpr::getZExt(CI,Type::getInt32Ty(CI->getContext())); + } + } + + unsigned ArgReg; + + // Passing bools around ends up doing a trunc to i1 and passing it. + // Codegen this as an argument + "and 1". + if (ArgVal->getType()->isIntegerTy(1) && isa<TruncInst>(ArgVal) && + cast<TruncInst>(ArgVal)->getParent() == I->getParent() && + ArgVal->hasOneUse()) { + ArgVal = cast<TruncInst>(ArgVal)->getOperand(0); + ArgReg = getRegForValue(ArgVal); + if (ArgReg == 0) return false; + + MVT ArgVT; + if (!isTypeLegal(ArgVal->getType(), ArgVT)) return false; + + ArgReg = FastEmit_ri(ArgVT, ArgVT, ISD::AND, ArgReg, + ArgVal->hasOneUse(), 1); + } else { + ArgReg = getRegForValue(ArgVal); + } + + if (ArgReg == 0) return false; + // FIXME: Only handle *easy* calls for now. if (CS.paramHasAttr(AttrInd, Attribute::InReg) || - CS.paramHasAttr(AttrInd, Attribute::StructRet) || CS.paramHasAttr(AttrInd, Attribute::Nest) || CS.paramHasAttr(AttrInd, Attribute::ByVal)) return false; - const Type *ArgTy = (*i)->getType(); + const Type *ArgTy = ArgVal->getType(); MVT ArgVT; if (!isTypeLegal(ArgTy, ArgVT)) return false; unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); Flags.setOrigAlign(OriginalAlignment); - Args.push_back(Arg); - ArgVals.push_back(*i); + Args.push_back(ArgReg); + ArgVals.push_back(ArgVal); ArgVTs.push_back(ArgVT); ArgFlags.push_back(Flags); } // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext()); + CCState CCInfo(CC, isVarArg, TM, ArgLocs, I->getParent()->getContext()); // Allocate shadow area for Win64 - if (Subtarget->isTargetWin64()) { + if (Subtarget->isTargetWin64()) CCInfo.AllocateStack(32, 8); - } CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86); @@ -1618,6 +1735,17 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { X86::EBX).addReg(Base); } + if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) { + // Count the number of XMM registers allocated. + static const unsigned XMMArgRegs[] = { + X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, + X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 + }; + unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::MOV8ri), + X86::AL).addImm(NumXMMRegs); + } + // Issue the call. MachineInstrBuilder MIB; if (CalleeOp) { @@ -1656,7 +1784,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { OpFlags = X86II::MO_PLT; } else if (Subtarget->isPICStyleStubAny() && (GV->isDeclaration() || GV->isWeakForLinker()) && - Subtarget->getDarwinVers() < 9) { + (!Subtarget->getTargetTriple().isMacOSX() || + Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) { // PC-relative references to external symbols should go through $stub, // unless we're building with the leopard linker or later, which // automatically synthesizes these stubs. @@ -1672,14 +1801,20 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (Subtarget->isPICStyleGOT()) MIB.addReg(X86::EBX); + if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) + MIB.addReg(X86::AL); + // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); // Issue CALLSEQ_END unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); + unsigned NumBytesCallee = 0; + if (!Subtarget->is64Bit() && CS.paramHasAttr(1, Attribute::StructRet)) + NumBytesCallee = 4; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp)) - .addImm(NumBytes).addImm(0); + .addImm(NumBytes).addImm(NumBytesCallee); // Now handle call return value (if any). SmallVector<unsigned, 4> UsedRegs; @@ -1850,10 +1985,13 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { if (isa<GlobalValue>(C)) { X86AddressMode AM; if (X86SelectAddress(C, AM)) { - if (TLI.getPointerTy() == MVT::i32) - Opc = X86::LEA32r; - else - Opc = X86::LEA64r; + // If the expression is just a basereg, then we're done, otherwise we need + // to emit an LEA. + if (AM.BaseType == X86AddressMode::RegBase && + AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == 0) + return AM.Base.Reg; + + Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r; unsigned ResultReg = createResultReg(RC); addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg), AM); @@ -1915,6 +2053,45 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) { return ResultReg; } +unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) { + MVT VT; + if (!isTypeLegal(CF->getType(), VT)) + return false; + + // Get opcode and regclass for the given zero. + unsigned Opc = 0; + const TargetRegisterClass *RC = NULL; + switch (VT.SimpleTy) { + default: return false; + case MVT::f32: + if (Subtarget->hasSSE1()) { + Opc = X86::FsFLD0SS; + RC = X86::FR32RegisterClass; + } else { + Opc = X86::LD_Fp032; + RC = X86::RFP32RegisterClass; + } + break; + case MVT::f64: + if (Subtarget->hasSSE2()) { + Opc = X86::FsFLD0SD; + RC = X86::FR64RegisterClass; + } else { + Opc = X86::LD_Fp064; + RC = X86::RFP64RegisterClass; + } + break; + case MVT::f80: + // No f80 support yet. + return false; + } + + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg); + return ResultReg; +} + + /// TryToFoldLoad - The specified machine instr operand is a vreg, and that /// vreg is being provided by the specified load instruction. If possible, /// try to fold the load as an operand to the instruction, returning true if diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 3aaa693..325d061 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -1307,7 +1307,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { // set up by FpSET_ST0, and our StackTop is off by one because of it. unsigned Op0 = getFPReg(MI->getOperand(0)); // Restore the actual StackTop from before Fp_SET_ST0. - // Note we can't handle Fp_SET_ST1 without a preceeding Fp_SET_ST0, and we + // Note we can't handle Fp_SET_ST1 without a preceding Fp_SET_ST0, and we // are not enforcing the constraint. ++StackTop; unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0). diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 0a3f931..06d12fc 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" @@ -296,7 +297,7 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, // FIXME: This is dirty hack. The code itself is pretty mess right now. // It should be rewritten from scratch and generalized sometimes. - // Determine maximum offset (minumum due to stack growth). + // Determine maximum offset (minimum due to stack growth). int64_t MaxOffset = 0; for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) @@ -551,65 +552,71 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // responsible for adjusting the stack pointer. Touching the stack at 4K // increments is necessary to ensure that the guard pages used by the OS // virtual memory manager are allocated in correct sequence. - if (NumBytes >= 4096 && - (STI.isTargetCygMing() || STI.isTargetWin32()) && - !STI.isTargetEnvMacho()) { + if (NumBytes >= 4096 && STI.isTargetCOFF() && !STI.isTargetEnvMacho()) { + const char *StackProbeSymbol; + bool isSPUpdateNeeded = false; + + if (Is64Bit) { + if (STI.isTargetCygMing()) + StackProbeSymbol = "___chkstk"; + else { + StackProbeSymbol = "__chkstk"; + isSPUpdateNeeded = true; + } + } else if (STI.isTargetCygMing()) + StackProbeSymbol = "_alloca"; + else + StackProbeSymbol = "_chkstk"; + // Check whether EAX is livein for this function. bool isEAXAlive = isEAXLiveIn(MF); - const char *StackProbeSymbol = - STI.isTargetWindows() ? "_chkstk" : "_alloca"; - if (Is64Bit && STI.isTargetCygMing()) - StackProbeSymbol = "__chkstk"; - unsigned CallOp = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; - if (!isEAXAlive) { - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(NumBytes); - BuildMI(MBB, MBBI, DL, TII.get(CallOp)) - .addExternalSymbol(StackProbeSymbol) - .addReg(StackPtr, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - } else { + if (isEAXAlive) { + // Sanity check that EAX is not livein for this function. + // It should not be, so throw an assert. + assert(!Is64Bit && "EAX is livein in x64 case!"); + // Save EAX BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) .addReg(X86::EAX, RegState::Kill); + } - // Allocate NumBytes-4 bytes on stack. We'll also use 4 already - // allocated bytes for EAX. + if (Is64Bit) { + // Handle the 64-bit Windows ABI case where we need to call __chkstk. + // Function prologue is responsible for adjusting the stack pointer. + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) + .addImm(NumBytes); + } else { + // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. + // We'll also use 4 already allocated bytes for EAX. BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(NumBytes - 4); - BuildMI(MBB, MBBI, DL, TII.get(CallOp)) - .addExternalSymbol(StackProbeSymbol) - .addReg(StackPtr, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - - // Restore EAX - MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), - X86::EAX), - StackPtr, false, NumBytes - 4); - MBB.insert(MBBI, MI); + .addImm(isEAXAlive ? NumBytes - 4 : NumBytes); + } + + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32)) + .addExternalSymbol(StackProbeSymbol) + .addReg(StackPtr, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + + // MSVC x64's __chkstk needs to adjust %rsp. + // FIXME: %rax preserves the offset and should be available. + if (isSPUpdateNeeded) + emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, + TII, *RegInfo); + + if (isEAXAlive) { + // Restore EAX + MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), + X86::EAX), + StackPtr, false, NumBytes - 4); + MBB.insert(MBBI, MI); } - } else if (NumBytes >= 4096 && - STI.isTargetWin64() && - !STI.isTargetEnvMacho()) { - // Sanity check that EAX is not livein for this function. It should - // not be, so throw an assert. - assert(!isEAXLiveIn(MF) && "EAX is livein in the Win64 case!"); - - // Handle the 64-bit Windows ABI case where we need to call __chkstk. - // Function prologue is responsible for adjusting the stack pointer. - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(NumBytes); - BuildMI(MBB, MBBI, DL, TII.get(X86::WINCALL64pcrel32)) - .addExternalSymbol("__chkstk") - .addReg(StackPtr, RegState::Define | RegState::Implicit); - emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, - TII, *RegInfo); } else if (NumBytes) emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII, *RegInfo); - if ((NumBytes || PushedRegs) && needsFrameMoves) { + if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) { // Mark end of stack pointer adjustment. MCSymbol *Label = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); @@ -779,7 +786,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, assert(Offset >= 0 && "Offset should never be negative"); if (Offset) { - // Check for possible merge with preceeding ADD instruction. + // Check for possible merge with preceding ADD instruction. Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo); } @@ -823,7 +830,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, int delta = -1*X86FI->getTCReturnAddrDelta(); MBBI = MBB.getLastNonDebugInstr(); - // Check for possible merge with preceeding ADD instruction. + // Check for possible merge with preceding ADD instruction. delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo); } @@ -892,7 +899,6 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); - bool isWin64 = STI.isTargetWin64(); unsigned SlotSize = STI.is64Bit() ? 8 : 4; unsigned FPReg = TRI->getFrameRegister(MF); unsigned CalleeFrameSize = 0; @@ -900,25 +906,39 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + // Push GPRs. It increases frame size. unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); + if (!X86::GR64RegClass.contains(Reg) && + !X86::GR32RegClass.contains(Reg)) + continue; // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); if (Reg == FPReg) // X86RegisterInfo::emitPrologue will handle spilling of frame register. continue; - if (!X86::VR128RegClass.contains(Reg) && !isWin64) { - CalleeFrameSize += SlotSize; - BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill); - } else { - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), - RC, TRI); - } + CalleeFrameSize += SlotSize; + BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill); } X86FI->setCalleeSavedFrameSize(CalleeFrameSize); + + // Make XMM regs spilled. X86 does not have ability of push/pop XMM. + // It can be done by spilling XMMs to stack frame. + // Note that only Win64 ABI might spill XMMs. + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i-1].getReg(); + if (X86::GR64RegClass.contains(Reg) || + X86::GR32RegClass.contains(Reg)) + continue; + // Add the callee-saved register as live-in. It's killed at the spill. + MBB.addLiveIn(Reg); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), + RC, TRI); + } + return true; } @@ -933,21 +953,30 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + // Reload XMMs from stack frame. + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + if (X86::GR64RegClass.contains(Reg) || + X86::GR32RegClass.contains(Reg)) + continue; + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), + RC, TRI); + } + + // POP GPRs. unsigned FPReg = TRI->getFrameRegister(MF); - bool isWin64 = STI.isTargetWin64(); unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); + if (!X86::GR64RegClass.contains(Reg) && + !X86::GR32RegClass.contains(Reg)) + continue; if (Reg == FPReg) // X86RegisterInfo::emitEpilogue will handle restoring of frame register. continue; - if (!X86::VR128RegClass.contains(Reg) && !isWin64) { - BuildMI(MBB, MI, DL, TII.get(Opc), Reg); - } else { - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), - RC, TRI); - } + BuildMI(MBB, MI, DL, TII.get(Opc), Reg); } return true; } diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 9b0ec6e..4534e85 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1580,6 +1580,81 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return RetVal; break; } + case ISD::AND: + case ISD::OR: + case ISD::XOR: { + // For operations of the form (x << C1) op C2, check if we can use a smaller + // encoding for C2 by transforming it into (x op (C2>>C1)) << C1. + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + + if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse()) + break; + + // i8 is unshrinkable, i16 should be promoted to i32. + if (NVT != MVT::i32 && NVT != MVT::i64) + break; + + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1); + ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1)); + if (!Cst || !ShlCst) + break; + + int64_t Val = Cst->getSExtValue(); + uint64_t ShlVal = ShlCst->getZExtValue(); + + // Make sure that we don't change the operation by removing bits. + // This only matters for OR and XOR, AND is unaffected. + if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val) + break; + + unsigned ShlOp, Op = 0; + EVT CstVT = NVT; + + // Check the minimum bitwidth for the new constant. + // TODO: AND32ri is the same as AND64ri32 with zext imm. + // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr + // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32. + if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal)) + CstVT = MVT::i8; + else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal)) + CstVT = MVT::i32; + + // Bail if there is no smaller encoding. + if (NVT == CstVT) + break; + + switch (NVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unsupported VT!"); + case MVT::i32: + assert(CstVT == MVT::i8); + ShlOp = X86::SHL32ri; + + switch (Opcode) { + case ISD::AND: Op = X86::AND32ri8; break; + case ISD::OR: Op = X86::OR32ri8; break; + case ISD::XOR: Op = X86::XOR32ri8; break; + } + break; + case MVT::i64: + assert(CstVT == MVT::i8 || CstVT == MVT::i32); + ShlOp = X86::SHL64ri; + + switch (Opcode) { + case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break; + case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break; + case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break; + } + break; + } + + // Emit the smaller op and the shift. + SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT); + SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst); + return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0), + getI8Imm(ShlVal)); + break; + } case X86ISD::UMUL: { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2f49dbc..703c01d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -45,6 +45,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/VectorExtras.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" @@ -221,7 +222,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // X86 is weird, it always uses i8 for shift amounts and setcc results. setBooleanContents(ZeroOrOneBooleanContent); - setSchedulingPreference(Sched::RegPressure); + + // For 64-bit since we have so many registers use the ILP scheduler, for + // 32-bit code use the register pressure specific scheduling. + if (Subtarget->is64Bit()) + setSchedulingPreference(Sched::ILP); + else + setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(X86StackPtr); if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) { @@ -543,12 +550,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - if (Subtarget->is64Bit()) - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); - if (Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); - else - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, + (Subtarget->is64Bit() ? MVT::i64 : MVT::i32), + (Subtarget->isTargetCOFF() + && !Subtarget->isTargetEnvMacho() + ? Custom : Expand)); if (!UseSoftFloat && X86ScalarSSEf64) { // f32 and f64 use SSE. @@ -921,6 +927,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Can turn SHL into an integer multiply. setOperationAction(ISD::SHL, MVT::v4i32, Custom); setOperationAction(ISD::SHL, MVT::v16i8, Custom); + setOperationAction(ISD::SRL, MVT::v4i32, Legal); // i8 and i16 vectors are custom , because the source register and source // source memory operand types are not the same width. f32 vectors are @@ -1271,27 +1278,6 @@ X86TargetLowering::findRepresentativeClass(EVT VT) const{ return std::make_pair(RRC, Cost); } -// FIXME: Why this routine is here? Move to RegInfo! -unsigned -X86TargetLowering::getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0; - switch (RC->getID()) { - default: - return 0; - case X86::GR32RegClassID: - return 4 - FPDiff; - case X86::GR64RegClassID: - return 8 - FPDiff; - case X86::VR128RegClassID: - return Subtarget->is64Bit() ? 10 : 4; - case X86::VR64RegClassID: - return 4; - } -} - bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const { if (!Subtarget->isTargetLinux()) @@ -1463,6 +1449,20 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const { return HasRet; } +EVT +X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, + ISD::NodeType ExtendKind) const { + MVT ReturnMVT; + // TODO: Is this also valid on 32-bit? + if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND) + ReturnMVT = MVT::i8; + else + ReturnMVT = MVT::i32; + + EVT MinVT = getRegisterType(Context, ReturnMVT); + return VT.bitsLT(MinVT) ? MinVT : VT; +} + /// LowerCallResult - Lower the result values of a call into the /// appropriate copies out of appropriate physical registers. /// @@ -1595,6 +1595,18 @@ static bool IsTailCallConvention(CallingConv::ID CC) { return (CC == CallingConv::Fast || CC == CallingConv::GHC); } +bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { + if (!CI->isTailCall()) + return false; + + CallSite CS(CI); + CallingConv::ID CalleeCC = CS.getCallingConv(); + if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C) + return false; + + return true; +} + /// FuncIsMadeTailCallSafe - Return true if the function is being made into /// a tailcall target by changing its ABI. static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) { @@ -1627,8 +1639,9 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, // In case of tail call optimization mark all arguments mutable. Since they // could be overwritten by lowering of arguments in case of a tail call. if (Flags.isByVal()) { - int FI = MFI->CreateFixedObject(Flags.getByValSize(), - VA.getLocMemOffset(), isImmutable); + unsigned Bytes = Flags.getByValSize(); + if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. + int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable); return DAG.getFrameIndex(FI, getPointerTy()); } else { int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, @@ -1765,8 +1778,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { - if (!IsWin64 && (Is64Bit || (CallConv != CallingConv::X86_FastCall && - CallConv != CallingConv::X86_ThisCall))) { + if (Is64Bit || (CallConv != CallingConv::X86_FastCall && + CallConv != CallingConv::X86_ThisCall)) { FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true)); } if (Is64Bit) { @@ -1818,7 +1831,9 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, int HomeOffset = TFI.getOffsetOfLocalArea() + 8; FuncInfo->setRegSaveFrameIndex( MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false)); - FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex()); + // Fixup to set vararg frame on shadow area (4 x i64). + if (NumIntRegs < 4) + FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex()); } else { // For X86-64, if there are vararg parameters that are passed via // registers, then we must store them to their spots on the stack so they @@ -1937,7 +1952,7 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, return SDValue(OutRetAddr.getNode(), 1); } -/// EmitTailCallStoreRetAddr - Emit a store of the return adress if tail call +/// EmitTailCallStoreRetAddr - Emit a store of the return address if tail call /// optimization is performed and it is required (FPDiff!=0). static SDValue EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, @@ -2028,7 +2043,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); SDValue RetAddrFrIdx; - // Load return adress for tail calls. + // Load return address for tail calls. if (isTailCall && FPDiff) Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, Is64Bit, FPDiff, dl); @@ -2185,7 +2200,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVector<SDValue, 8> MemOpChains2; SDValue FIN; int FI = 0; - // Do not flag preceeding copytoreg stuff together with the following stuff. + // Do not flag preceding copytoreg stuff together with the following stuff. InFlag = SDValue(); if (GuaranteedTailCallOpt) { for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { @@ -2266,7 +2281,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, OpFlags = X86II::MO_PLT; } else if (Subtarget->isPICStyleStubAny() && (GV->isDeclaration() || GV->isWeakForLinker()) && - Subtarget->getDarwinVers() < 9) { + (!Subtarget->getTargetTriple().isMacOSX() || + Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) { // PC-relative references to external symbols should go through $stub, // unless we're building with the leopard linker or later, which // automatically synthesizes these stubs. @@ -2285,7 +2301,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, getTargetMachine().getRelocationModel() == Reloc::PIC_) { OpFlags = X86II::MO_PLT; } else if (Subtarget->isPICStyleStubAny() && - Subtarget->getDarwinVers() < 9) { + (!Subtarget->getTargetTriple().isMacOSX() || + Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) { // PC-relative references to external symbols should go through $stub, // unless we're building with the leopard linker or later, which // automatically synthesizes these stubs. @@ -3173,7 +3190,8 @@ bool X86::isMOVLPMask(ShuffleVectorSDNode *N) { bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) { unsigned NumElems = N->getValueType(0).getVectorNumElements(); - if (NumElems != 2 && NumElems != 4) + if ((NumElems != 2 && NumElems != 4) + || N->getValueType(0).getSizeInBits() > 128) return false; for (unsigned i = 0; i < NumElems/2; ++i) @@ -3195,19 +3213,36 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT, if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) return false; - for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { - int BitI = Mask[i]; - int BitI1 = Mask[i+1]; - if (!isUndefOrEqual(BitI, j)) - return false; - if (V2IsSplat) { - if (!isUndefOrEqual(BitI1, NumElts)) - return false; - } else { - if (!isUndefOrEqual(BitI1, j + NumElts)) + // Handle vector lengths > 128 bits. Define a "section" as a set of + // 128 bits. AVX defines UNPCK* to operate independently on 128-bit + // sections. + unsigned NumSections = VT.getSizeInBits() / 128; + if (NumSections == 0 ) NumSections = 1; // Handle MMX + unsigned NumSectionElts = NumElts / NumSections; + + unsigned Start = 0; + unsigned End = NumSectionElts; + for (unsigned s = 0; s < NumSections; ++s) { + for (unsigned i = Start, j = s * NumSectionElts; + i != End; + i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; + if (!isUndefOrEqual(BitI, j)) return false; + if (V2IsSplat) { + if (!isUndefOrEqual(BitI1, NumElts)) + return false; + } else { + if (!isUndefOrEqual(BitI1, j + NumElts)) + return false; + } } + // Process the next 128 bits. + Start += NumSectionElts; + End += NumSectionElts; } + return true; } @@ -3255,14 +3290,27 @@ static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) { if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; - for (int i = 0, j = 0; i != NumElems; i += 2, ++j) { - int BitI = Mask[i]; - int BitI1 = Mask[i+1]; - if (!isUndefOrEqual(BitI, j)) - return false; - if (!isUndefOrEqual(BitI1, j)) - return false; + // Handle vector lengths > 128 bits. Define a "section" as a set of + // 128 bits. AVX defines UNPCK* to operate independently on 128-bit + // sections. + unsigned NumSections = VT.getSizeInBits() / 128; + if (NumSections == 0 ) NumSections = 1; // Handle MMX + unsigned NumSectionElts = NumElems / NumSections; + + for (unsigned s = 0; s < NumSections; ++s) { + for (unsigned i = s * NumSectionElts, j = s * NumSectionElts; + i != NumSectionElts * (s + 1); + i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; + + if (!isUndefOrEqual(BitI, j)) + return false; + if (!isUndefOrEqual(BitI1, j)) + return false; + } } + return true; } @@ -3846,8 +3894,8 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, /// getShuffleScalarElt - Returns the scalar element that will make up the ith /// element of the result of the vector shuffle. -SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, - unsigned Depth) { +static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, + unsigned Depth) { if (Depth == 6) return SDValue(); // Limit search depth. @@ -3895,11 +3943,15 @@ SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: - DecodePUNPCKLMask(NumElems, ShuffleMask); + DecodePUNPCKLMask(VT, ShuffleMask); break; case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - DecodeUNPCKLPMask(NumElems, ShuffleMask); + case X86ISD::VUNPCKLPS: + case X86ISD::VUNPCKLPD: + case X86ISD::VUNPCKLPSY: + case X86ISD::VUNPCKLPDY: + DecodeUNPCKLPMask(VT, ShuffleMask); break; case X86ISD::MOVHLPS: DecodeMOVHLPSMask(NumElems, ShuffleMask); @@ -3968,7 +4020,7 @@ SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, /// getNumOfConsecutiveZeros - Return the number of elements of a vector /// shuffle operation which come from a consecutively from a zero. The -/// search can start in two diferent directions, from left or right. +/// search can start in two different directions, from left or right. static unsigned getNumOfConsecutiveZeros(SDNode *N, int NumElems, bool ZerosFromLeft, SelectionDAG &DAG) { @@ -5263,6 +5315,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { // Break it into (shuffle shuffle_hi, shuffle_lo). Locs.clear(); + Locs.resize(4); SmallVector<int,8> LoMask(4U, -1); SmallVector<int,8> HiMask(4U, -1); @@ -5508,12 +5561,16 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { X86::getShuffleSHUFImmediate(SVOp), DAG); } -static inline unsigned getUNPCKLOpcode(EVT VT) { +static inline unsigned getUNPCKLOpcode(EVT VT, const X86Subtarget *Subtarget) { switch(VT.getSimpleVT().SimpleTy) { case MVT::v4i32: return X86ISD::PUNPCKLDQ; case MVT::v2i64: return X86ISD::PUNPCKLQDQ; - case MVT::v4f32: return X86ISD::UNPCKLPS; - case MVT::v2f64: return X86ISD::UNPCKLPD; + case MVT::v4f32: + return Subtarget->hasAVX() ? X86ISD::VUNPCKLPS : X86ISD::UNPCKLPS; + case MVT::v2f64: + return Subtarget->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD; + case MVT::v8f32: return X86ISD::VUNPCKLPSY; + case MVT::v4f64: return X86ISD::VUNPCKLPDY; case MVT::v16i8: return X86ISD::PUNPCKLBW; case MVT::v8i16: return X86ISD::PUNPCKLWD; default: @@ -5641,7 +5698,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // unpckh_undef). Only use pshufd if speed is more important than size. if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp)) if (VT != MVT::v2i64 && VT != MVT::v2f64) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), dl, VT, V1, V1, DAG); if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp)) if (VT != MVT::v2i64 && VT != MVT::v2f64) return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); @@ -5762,7 +5819,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } if (X86::isUNPCKLMask(SVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), + dl, VT, V1, V2, DAG); if (X86::isUNPCKHMask(SVOp)) return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG); @@ -5789,7 +5847,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp); if (X86::isUNPCKLMask(NewSVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), + dl, VT, V2, V1, DAG); if (X86::isUNPCKHMask(NewSVOp)) return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG); @@ -5812,8 +5871,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) && SVOp->getSplatIndex() == 0 && V2IsUndef) { - if (VT == MVT::v2f64) - return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG); + if (VT == MVT::v2f64) { + X86ISD::NodeType Opcode = + getSubtarget()->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD; + return getTargetShuffleNode(Opcode, dl, VT, V1, V1, DAG); + } if (VT == MVT::v2i64) return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG); } @@ -5840,7 +5902,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (X86::isUNPCKL_v_undef_Mask(SVOp)) if (VT != MVT::v2i64 && VT != MVT::v2f64) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), + dl, VT, V1, V1, DAG); if (X86::isUNPCKH_v_undef_Mask(SVOp)) if (VT != MVT::v2i64 && VT != MVT::v2f64) return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); @@ -7868,6 +7931,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) && "This should be used only on Windows targets"); + assert(!Subtarget->isTargetEnvMacho()); DebugLoc dl = Op.getDebugLoc(); // Get the inputs. @@ -7878,8 +7942,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue Flag; EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; + unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX); - Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag); + Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag); Flag = Chain.getValue(1); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); @@ -8809,8 +8874,8 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { case ISD::SADDO: // A subtract of one will be selected as a INC. Note that INC doesn't // set CF, so we can't do this for UADDO. - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) - if (C->getAPIntValue() == 1) { + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) + if (C->isOne()) { BaseOp = X86ISD::INC; Cond = X86::COND_O; break; @@ -8825,8 +8890,8 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { case ISD::SSUBO: // A subtract of one will be selected as a DEC. Note that DEC doesn't // set CF, so we can't do this for USUBO. - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) - if (C->getAPIntValue() == 1) { + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) + if (C->isOne()) { BaseOp = X86ISD::DEC; Cond = X86::COND_O; break; @@ -10351,21 +10416,48 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI, const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); + assert(!Subtarget->isTargetEnvMacho()); + // The lowering is pretty easy: we're just emitting the call to _alloca. The // non-trivial part is impdef of ESP. - // FIXME: The code should be tweaked as soon as we'll try to do codegen for - // mingw-w64. - const char *StackProbeSymbol = + if (Subtarget->isTargetWin64()) { + if (Subtarget->isTargetCygMing()) { + // ___chkstk(Mingw64): + // Clobbers R10, R11, RAX and EFLAGS. + // Updates RSP. + BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA)) + .addExternalSymbol("___chkstk") + .addReg(X86::RAX, RegState::Implicit) + .addReg(X86::RSP, RegState::Implicit) + .addReg(X86::RAX, RegState::Define | RegState::Implicit) + .addReg(X86::RSP, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + } else { + // __chkstk(MSVCRT): does not update stack pointer. + // Clobbers R10, R11 and EFLAGS. + // FIXME: RAX(allocated size) might be reused and not killed. + BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA)) + .addExternalSymbol("__chkstk") + .addReg(X86::RAX, RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + // RAX has the offset to subtracted from RSP. + BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP) + .addReg(X86::RSP) + .addReg(X86::RAX); + } + } else { + const char *StackProbeSymbol = Subtarget->isTargetWindows() ? "_chkstk" : "_alloca"; - BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32)) - .addExternalSymbol(StackProbeSymbol) - .addReg(X86::EAX, RegState::Implicit) - .addReg(X86::ESP, RegState::Implicit) - .addReg(X86::EAX, RegState::Define | RegState::Implicit) - .addReg(X86::ESP, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32)) + .addExternalSymbol(StackProbeSymbol) + .addReg(X86::EAX, RegState::Implicit) + .addReg(X86::ESP, RegState::Implicit) + .addReg(X86::EAX, RegState::Define | RegState::Implicit) + .addReg(X86::ESP, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + } MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; @@ -12126,7 +12218,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { AsmPieces.clear(); SplitString(AsmStr, AsmPieces, " \t"); // Split with whitespace. - // FIXME: this should verify that we are targetting a 486 or better. If not, + // FIXME: this should verify that we are targeting a 486 or better. If not, // we will turn this bswap into something that will be lowered to logical ops // instead of emitting the bswap asm. For now, we don't support 486 or lower // so don't worry about this. diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 6ec4a7d..6301057 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -677,9 +677,6 @@ namespace llvm { /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; - unsigned getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const; - /// getStackCookieLocation - Return true if the target stores stack /// protector cookies at a fixed offset in some non-standard address /// space, and populates the address space and offset as @@ -846,6 +843,12 @@ namespace llvm { virtual bool isUsedByReturnOnly(SDNode *N) const; + virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; + + virtual EVT + getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, + ISD::NodeType ExtendKind) const; + virtual bool CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td index 45d1c6b..dd4f6a5 100644 --- a/lib/Target/X86/X86Instr3DNow.td +++ b/lib/Target/X86/X86Instr3DNow.td @@ -12,66 +12,91 @@ // //===----------------------------------------------------------------------===// -// FIXME: We don't support any intrinsics for these instructions yet. +class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pat> + : I<o, F, outs, ins, asm, pat>, TB, Requires<[Has3DNow]> { +} -class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, TB, Requires<[Has3DNow]> { +class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat> + : I3DNow<o, F, (outs VR64:$dst), ins, + !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), pat>, + Has3DNow0F0FOpcode { + // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet. + let isAsmParserOnly = 1; + let Constraints = "$src1 = $dst"; } -class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic> - : I<o, F, (outs VR64:$dst), ins, - !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), []>, - TB, Requires<[Has3DNow]>, Has3DNow0F0FOpcode { +class I3DNow_conv<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat> + : I3DNow<o, F, (outs VR64:$dst), ins, + !strconcat(Mnemonic, "\t{$src, $dst|$dst, $src}"), pat>, + Has3DNow0F0FOpcode { // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet. let isAsmParserOnly = 1; } +multiclass I3DNow_binop_rm<bits<8> opc, string Mn> { + def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn, []>; + def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn, []>; +} + +multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn, string Ver = ""> { + def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn, + [(set VR64:$dst, (!cast<Intrinsic>( + !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>; + def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn, + [(set VR64:$dst, (!cast<Intrinsic>( + !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, + (bitconvert (load_mmx addr:$src2))))]>; +} + +multiclass I3DNow_conv_rm<bits<8> opc, string Mn> { + def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src1), Mn, []>; + def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src1), Mn, []>; +} -let Constraints = "$src1 = $dst" in { - // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic. - // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp. - multiclass I3DNow_binop_rm<bits<8> opc, string Mn> { - def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn>; - def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn>; - } +multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn, string Ver = ""> { + def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src), Mn, + [(set VR64:$dst, (!cast<Intrinsic>( + !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src))]>; + def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src), Mn, + [(set VR64:$dst, (!cast<Intrinsic>( + !strconcat("int_x86_3dnow", Ver, "_", Mn)) + (bitconvert (load_mmx addr:$src))))]>; } -defm PAVGUSB : I3DNow_binop_rm<0xBF, "pavgusb">; -defm PF2ID : I3DNow_binop_rm<0x1D, "pf2id">; -defm PFACC : I3DNow_binop_rm<0xAE, "pfacc">; -defm PFADD : I3DNow_binop_rm<0x9E, "pfadd">; -defm PFCMPEQ : I3DNow_binop_rm<0xB0, "pfcmpeq">; -defm PFCMPGE : I3DNow_binop_rm<0x90, "pfcmpge">; -defm PFCMPGT : I3DNow_binop_rm<0xA0, "pfcmpgt">; -defm PFMAX : I3DNow_binop_rm<0xA4, "pfmax">; -defm PFMIN : I3DNow_binop_rm<0x94, "pfmin">; -defm PFMUL : I3DNow_binop_rm<0xB4, "pfmul">; -defm PFRCP : I3DNow_binop_rm<0x96, "pfrcp">; -defm PFRCPIT1 : I3DNow_binop_rm<0xA6, "pfrcpit1">; -defm PFRCPIT2 : I3DNow_binop_rm<0xB6, "pfrcpit2">; -defm PFRSQIT1 : I3DNow_binop_rm<0xA7, "pfrsqit1">; -defm PFRSQRT : I3DNow_binop_rm<0x97, "pfrsqrt">; -defm PFSUB : I3DNow_binop_rm<0x9A, "pfsub">; -defm PFSUBR : I3DNow_binop_rm<0xAA, "pfsubr">; -defm PI2FD : I3DNow_binop_rm<0x0D, "pi2fd">; -defm PMULHRW : I3DNow_binop_rm<0xB7, "pmulhrw">; +defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb">; +defm PF2ID : I3DNow_conv_rm_int<0x1D, "pf2id">; +defm PFACC : I3DNow_binop_rm_int<0xAE, "pfacc">; +defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd">; +defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq">; +defm PFCMPGE : I3DNow_binop_rm_int<0x90, "pfcmpge">; +defm PFCMPGT : I3DNow_binop_rm_int<0xA0, "pfcmpgt">; +defm PFMAX : I3DNow_binop_rm_int<0xA4, "pfmax">; +defm PFMIN : I3DNow_binop_rm_int<0x94, "pfmin">; +defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul">; +defm PFRCP : I3DNow_conv_rm_int<0x96, "pfrcp">; +defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1">; +defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2">; +defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1">; +defm PFRSQRT : I3DNow_conv_rm_int<0x97, "pfrsqrt">; +defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub">; +defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr">; +defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd">; +defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">; def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>; def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr), "prefetch $addr", []>; - + // FIXME: Diassembler gets a bogus decode conflict. -let isAsmParserOnly = 1 in { +let isAsmParserOnly = 1 in def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr), "prefetchw $addr", []>; -} // "3DNowA" instructions -defm PF2IW : I3DNow_binop_rm<0x1C, "pf2iw">; -defm PI2FW : I3DNow_binop_rm<0x0C, "pi2fw">; -defm PFNACC : I3DNow_binop_rm<0x8A, "pfnacc">; -defm PFPNACC : I3DNow_binop_rm<0x8E, "pfpnacc">; -defm PSWAPD : I3DNow_binop_rm<0xBB, "pswapd">; +defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">; +defm PI2FW : I3DNow_conv_rm_int<0x0C, "pi2fw", "a">; +defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", "a">; +defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", "a">; +defm PSWAPD : I3DNow_conv_rm_int<0xBB, "pswapd", "a">; diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index f0ea068..9f7a4b0 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -163,7 +163,7 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), } // Defs = [EFLAGS] -// Suprisingly enough, these are not two address instructions! +// Surprisingly enough, these are not two address instructions! let Defs = [EFLAGS] in { // Register-Integer Signed Integer Multiply def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16 diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index 77f4725..c228a0ae 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -263,6 +263,16 @@ let isCall = 1, isCodeGenOnly = 1 in Requires<[IsWin64]>; } +let isCall = 1, isCodeGenOnly = 1 in + // __chkstk(MSVC): clobber R10, R11 and EFLAGS. + // ___chkstk(Mingw64): clobber R10, R11, RAX and EFLAGS, and update RSP. + let Defs = [RAX, R10, R11, RSP, EFLAGS], + Uses = [RSP] in { + def W64ALLOCA : Ii32PCRel<0xE8, RawFrm, + (outs), (ins i64i32imm_pcrel:$dst, variable_ops), + "call{q}\t$dst", []>, + Requires<[IsWin64]>; + } let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, isCodeGenOnly = 1 in diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 0660072..7daa264 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -91,21 +91,23 @@ class REX_W { bit hasREX_WPrefix = 1; } class LOCK { bit hasLockPrefix = 1; } class SegFS { bits<2> SegOvrBits = 1; } class SegGS { bits<2> SegOvrBits = 2; } -class TB { bits<4> Prefix = 1; } -class REP { bits<4> Prefix = 2; } -class D8 { bits<4> Prefix = 3; } -class D9 { bits<4> Prefix = 4; } -class DA { bits<4> Prefix = 5; } -class DB { bits<4> Prefix = 6; } -class DC { bits<4> Prefix = 7; } -class DD { bits<4> Prefix = 8; } -class DE { bits<4> Prefix = 9; } -class DF { bits<4> Prefix = 10; } -class XD { bits<4> Prefix = 11; } -class XS { bits<4> Prefix = 12; } -class T8 { bits<4> Prefix = 13; } -class TA { bits<4> Prefix = 14; } -class TF { bits<4> Prefix = 15; } +class TB { bits<5> Prefix = 1; } +class REP { bits<5> Prefix = 2; } +class D8 { bits<5> Prefix = 3; } +class D9 { bits<5> Prefix = 4; } +class DA { bits<5> Prefix = 5; } +class DB { bits<5> Prefix = 6; } +class DC { bits<5> Prefix = 7; } +class DD { bits<5> Prefix = 8; } +class DE { bits<5> Prefix = 9; } +class DF { bits<5> Prefix = 10; } +class XD { bits<5> Prefix = 11; } +class XS { bits<5> Prefix = 12; } +class T8 { bits<5> Prefix = 13; } +class TA { bits<5> Prefix = 14; } +class A6 { bits<5> Prefix = 15; } +class A7 { bits<5> Prefix = 16; } +class TF { bits<5> Prefix = 17; } class VEX { bit hasVEXPrefix = 1; } class VEX_W { bit hasVEX_WPrefix = 1; } class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; } @@ -136,7 +138,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, bit hasOpSizePrefix = 0; // Does this inst have a 0x66 prefix? bit hasAdSizePrefix = 0; // Does this inst have a 0x67 prefix? - bits<4> Prefix = 0; // Which prefix byte does this inst have? + bits<5> Prefix = 0; // Which prefix byte does this inst have? bit hasREX_WPrefix = 0; // Does this inst require the REX.W prefix? FPFormat FPForm = NotFP; // What flavor of FP instruction is this? bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix? @@ -154,20 +156,20 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, let TSFlags{5-0} = FormBits; let TSFlags{6} = hasOpSizePrefix; let TSFlags{7} = hasAdSizePrefix; - let TSFlags{11-8} = Prefix; - let TSFlags{12} = hasREX_WPrefix; - let TSFlags{15-13} = ImmT.Value; - let TSFlags{18-16} = FPForm.Value; - let TSFlags{19} = hasLockPrefix; - let TSFlags{21-20} = SegOvrBits; - let TSFlags{23-22} = ExeDomain.Value; - let TSFlags{31-24} = Opcode; - let TSFlags{32} = hasVEXPrefix; - let TSFlags{33} = hasVEX_WPrefix; - let TSFlags{34} = hasVEX_4VPrefix; - let TSFlags{35} = hasVEX_i8ImmReg; - let TSFlags{36} = hasVEX_L; - let TSFlags{37} = has3DNow0F0FOpcode; + let TSFlags{12-8} = Prefix; + let TSFlags{13} = hasREX_WPrefix; + let TSFlags{16-14} = ImmT.Value; + let TSFlags{19-17} = FPForm.Value; + let TSFlags{20} = hasLockPrefix; + let TSFlags{22-21} = SegOvrBits; + let TSFlags{24-23} = ExeDomain.Value; + let TSFlags{32-25} = Opcode; + let TSFlags{33} = hasVEXPrefix; + let TSFlags{34} = hasVEX_WPrefix; + let TSFlags{35} = hasVEX_4VPrefix; + let TSFlags{36} = hasVEX_i8ImmReg; + let TSFlags{37} = hasVEX_L; + let TSFlags{38} = has3DNow0F0FOpcode; } class PseudoI<dag oops, dag iops, list<dag> pattern> @@ -319,7 +321,7 @@ class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm, Requires<[HasAVX]>; class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, + : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, TB, Requires<[HasAVX]>; // SSE2 Instruction Templates: @@ -353,7 +355,7 @@ class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm, Requires<[HasAVX]>; class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>, + : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>, TB, OpSize, Requires<[HasAVX]>; // SSE3 Instruction Templates: diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 5016c0f..3cbfac1 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -132,6 +132,8 @@ def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>; def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>; def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>; +def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>; +def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>; def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>; def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 76a9b12..83f0260 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -232,7 +232,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?"); RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U); - // If this is not a reversable operation (because there is a many->one) + // If this is not a reversible operation (because there is a many->one) // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE) continue; @@ -335,7 +335,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?"); RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align); - // If this is not a reversable operation (because there is a many->one) + // If this is not a reversible operation (because there is a many->one) // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. if (OpTbl0[i][1] & TB_NOT_REVERSABLE) continue; @@ -460,7 +460,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries"); RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align); - // If this is not a reversable operation (because there is a many->one) + // If this is not a reversible operation (because there is a many->one) // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. if (OpTbl1[i][1] & TB_NOT_REVERSABLE) continue; @@ -682,7 +682,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!"); RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align); - // If this is not a reversable operation (because there is a many->one) + // If this is not a reversible operation (because there is a many->one) // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. if (OpTbl2[i][1] & TB_NOT_REVERSABLE) continue; @@ -916,7 +916,6 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, case X86::MOVSDrm: case X86::MOVAPSrm: case X86::MOVUPSrm: - case X86::MOVUPSrm_Int: case X86::MOVAPDrm: case X86::MOVDQArm: case X86::MMX_MOVD64rm: @@ -2241,6 +2240,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, bool isTwoAddr = NumOps > 1 && MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; + // FIXME: AsmPrinter doesn't know how to handle + // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding. + if (MI->getOpcode() == X86::ADD32ri && + MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS) + return NULL; + MachineInstr *NewMI = NULL; // Folding a memory location into the two-address part of a two-address // instruction is different than folding it other places. It requires @@ -2535,6 +2540,12 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, case X86::TEST32rr: case X86::TEST64rr: return true; + case X86::ADD32ri: + // FIXME: AsmPrinter doesn't know how to handle + // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding. + if (MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS) + return false; + break; } } @@ -2845,11 +2856,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case X86::FsMOVAPDrm: case X86::MOVAPSrm: case X86::MOVUPSrm: - case X86::MOVUPSrm_Int: case X86::MOVAPDrm: case X86::MOVDQArm: case X86::MOVDQUrm: - case X86::MOVDQUrm_Int: break; } switch (Opc2) { @@ -2869,11 +2878,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case X86::FsMOVAPDrm: case X86::MOVAPSrm: case X86::MOVUPSrm: - case X86::MOVUPSrm_Int: case X86::MOVAPDrm: case X86::MOVDQArm: case X86::MOVDQUrm: - case X86::MOVDQUrm_Int: break; } @@ -3085,12 +3092,8 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { NopInst.setOpcode(X86::NOOP); } -bool X86InstrInfo:: -hasHighOperandLatency(const InstrItineraryData *ItinData, - const MachineRegisterInfo *MRI, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx) const { - switch (DefMI->getOpcode()) { +bool X86InstrInfo::isHighLatencyDef(int opc) const { + switch (opc) { default: return false; case X86::DIVSDrm: case X86::DIVSDrm_Int: @@ -3120,6 +3123,14 @@ hasHighOperandLatency(const InstrItineraryData *ItinData, } } +bool X86InstrInfo:: +hasHighOperandLatency(const InstrItineraryData *ItinData, + const MachineRegisterInfo *MRI, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const { + return isHighLatencyDef(DefMI->getOpcode()); +} + namespace { /// CGBR - Create Global Base Reg pass. This initializes the PIC /// global base register for x86-32. diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index fcb5a25..8da68b5 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -33,15 +33,15 @@ namespace X86 { AddrScaleAmt = 1, AddrIndexReg = 2, AddrDisp = 3, - + /// AddrSegmentReg - The operand # of the segment in the memory operand. AddrSegmentReg = 4, /// AddrNumOperands - Total number of operands in a memory reference. AddrNumOperands = 5 }; - - + + // X86 specific condition code. These correspond to X86_*_COND in // X86InstrInfo.td. They must be kept in synch. enum CondCode { @@ -72,16 +72,16 @@ namespace X86 { COND_INVALID }; - + // Turn condition code into conditional branch opcode. unsigned GetCondBranchFromCond(CondCode CC); - + /// GetOppositeBranchCondition - Return the inverse of the specified cond, /// e.g. turning COND_E to COND_NE. CondCode GetOppositeBranchCondition(X86::CondCode CC); } - + /// X86II - This namespace holds all of the target specific flags that /// instruction info tracks. /// @@ -90,14 +90,14 @@ namespace X86II { enum TOF { //===------------------------------------------------------------------===// // X86 Specific MachineOperand flags. - + MO_NO_FLAG, - + /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a /// relocation of: /// SYMBOL_LABEL + [. - PICBASELABEL] MO_GOT_ABSOLUTE_ADDRESS, - + /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the /// immediate should get the value of the symbol minus the PIC base label: /// SYMBOL_LABEL - PICBASELABEL @@ -106,77 +106,77 @@ namespace X86II { /// MO_GOT - On a symbol operand this indicates that the immediate is the /// offset to the GOT entry for the symbol name from the base of the GOT. /// - /// See the X86-64 ELF ABI supplement for more details. + /// See the X86-64 ELF ABI supplement for more details. /// SYMBOL_LABEL @GOT MO_GOT, - + /// MO_GOTOFF - On a symbol operand this indicates that the immediate is - /// the offset to the location of the symbol name from the base of the GOT. + /// the offset to the location of the symbol name from the base of the GOT. /// - /// See the X86-64 ELF ABI supplement for more details. + /// See the X86-64 ELF ABI supplement for more details. /// SYMBOL_LABEL @GOTOFF MO_GOTOFF, - + /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is /// offset to the GOT entry for the symbol name from the current code - /// location. + /// location. /// - /// See the X86-64 ELF ABI supplement for more details. + /// See the X86-64 ELF ABI supplement for more details. /// SYMBOL_LABEL @GOTPCREL MO_GOTPCREL, - + /// MO_PLT - On a symbol operand this indicates that the immediate is - /// offset to the PLT entry of symbol name from the current code location. + /// offset to the PLT entry of symbol name from the current code location. /// - /// See the X86-64 ELF ABI supplement for more details. + /// See the X86-64 ELF ABI supplement for more details. /// SYMBOL_LABEL @PLT MO_PLT, - + /// MO_TLSGD - On a symbol operand this indicates that the immediate is /// some TLS offset. /// - /// See 'ELF Handling for Thread-Local Storage' for more details. + /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @TLSGD MO_TLSGD, - + /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is /// some TLS offset. /// - /// See 'ELF Handling for Thread-Local Storage' for more details. + /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @GOTTPOFF MO_GOTTPOFF, - + /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is /// some TLS offset. /// - /// See 'ELF Handling for Thread-Local Storage' for more details. + /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @INDNTPOFF MO_INDNTPOFF, - + /// MO_TPOFF - On a symbol operand this indicates that the immediate is /// some TLS offset. /// - /// See 'ELF Handling for Thread-Local Storage' for more details. + /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @TPOFF MO_TPOFF, - + /// MO_NTPOFF - On a symbol operand this indicates that the immediate is /// some TLS offset. /// - /// See 'ELF Handling for Thread-Local Storage' for more details. + /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @NTPOFF MO_NTPOFF, - + /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the /// reference is actually to the "__imp_FOO" symbol. This is used for /// dllimport linkage on windows. MO_DLLIMPORT, - + /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the /// reference is actually to the "FOO$stub" symbol. This is used for calls /// and jumps to external functions on Tiger and earlier. MO_DARWIN_STUB, - + /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the /// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a /// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub. @@ -186,19 +186,19 @@ namespace X86II { /// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is /// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub. MO_DARWIN_NONLAZY_PIC_BASE, - + /// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE", /// which is a PIC-base-relative reference to a hidden dyld lazy pointer /// stub. MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE, - + /// MO_TLVP - On a symbol operand this indicates that the immediate is /// some TLS offset. /// /// This is the TLS offset for the Darwin TLS mechanism. MO_TLVP, - + /// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate /// is some TLS offset from the picbase. /// @@ -239,7 +239,7 @@ inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) { return false; } } - + /// X86II - This namespace holds all of the target specific flags that /// instruction info tracks. /// @@ -299,7 +299,7 @@ namespace X86II { // MRMInitReg - This form is used for instructions whose source and // destinations are the same register. MRMInitReg = 32, - + //// MRM_C1 - A mod/rm byte of exactly 0xC1. MRM_C1 = 33, MRM_C2 = 34, @@ -318,7 +318,7 @@ namespace X86II { /// immediates, the first of which is a 16-bit immediate (specified by /// the imm encoding) and the second is a 8-bit fixed value. RawFrmImm8 = 43, - + /// RawFrmImm16 - This is used for CALL FAR instructions, which have two /// immediates, the first of which is a 16 or 32-bit immediate (specified by /// the imm encoding) and the second is a 16-bit fixed value. In the AMD @@ -347,7 +347,7 @@ namespace X86II { // set, there is no prefix byte for obtaining a multibyte opcode. // Op0Shift = 8, - Op0Mask = 0xF << Op0Shift, + Op0Mask = 0x1F << Op0Shift, // TB - TwoByte - Set if this instruction has a two byte opcode, which // starts with a 0x0F byte before the real opcode. @@ -368,11 +368,12 @@ namespace X86II { // floating point operations performed in the SSE registers. XD = 11 << Op0Shift, XS = 12 << Op0Shift, - // T8, TA - Prefix after the 0x0F prefix. + // T8, TA, A6, A7 - Prefix after the 0x0F prefix. T8 = 13 << Op0Shift, TA = 14 << Op0Shift, - + A6 = 15 << Op0Shift, A7 = 16 << Op0Shift, + // TF - Prefix before and after 0x0F - TF = 15 << Op0Shift, + TF = 17 << Op0Shift, //===------------------------------------------------------------------===// // REX_W - REX prefixes are instruction prefixes used in 64-bit mode. @@ -380,13 +381,13 @@ namespace X86II { // etc. We only cares about REX.W and REX.R bits and only the former is // statically determined. // - REXShift = 12, + REXShift = Op0Shift + 5, REX_W = 1 << REXShift, //===------------------------------------------------------------------===// // This three-bit field describes the size of an immediate operand. Zero is // unused so that we can tell if we forgot to set a value. - ImmShift = 13, + ImmShift = REXShift + 1, ImmMask = 7 << ImmShift, Imm8 = 1 << ImmShift, Imm8PCRel = 2 << ImmShift, @@ -400,7 +401,7 @@ namespace X86II { // FP Instruction Classification... Zero is non-fp instruction. // FPTypeMask - Mask for all of the FP types... - FPTypeShift = 16, + FPTypeShift = ImmShift + 3, FPTypeMask = 7 << FPTypeShift, // NotFP - The default, set for instructions that do not use FP registers. @@ -433,25 +434,26 @@ namespace X86II { SpecialFP = 7 << FPTypeShift, // Lock prefix - LOCKShift = 19, + LOCKShift = FPTypeShift + 3, LOCK = 1 << LOCKShift, // Segment override prefixes. Currently we just need ability to address // stuff in gs and fs segments. - SegOvrShift = 20, + SegOvrShift = LOCKShift + 1, SegOvrMask = 3 << SegOvrShift, FS = 1 << SegOvrShift, GS = 2 << SegOvrShift, - // Execution domain for SSE instructions in bits 22, 23. - // 0 in bits 22-23 means normal, non-SSE instruction. - SSEDomainShift = 22, + // Execution domain for SSE instructions in bits 23, 24. + // 0 in bits 23-24 means normal, non-SSE instruction. + SSEDomainShift = SegOvrShift + 2, - OpcodeShift = 24, - OpcodeMask = 0xFF << OpcodeShift, + OpcodeShift = SSEDomainShift + 2, + OpcodeMask = 0xFFULL << OpcodeShift, //===------------------------------------------------------------------===// /// VEX - The opcode prefix used by AVX instructions + VEXShift = OpcodeShift + 8, VEX = 1U << 0, /// VEX_W - Has a opcode specific functionality, but is used in the same @@ -473,7 +475,7 @@ namespace X86II { /// if a VR256 register is used, but some AVX instructions also have this /// field marked when using a f256 memory references. VEX_L = 1U << 4, - + /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the /// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction @@ -482,18 +484,18 @@ namespace X86II { /// this flag to indicate that the encoder should do the wacky 3DNow! thing. Has3DNow0F0FOpcode = 1U << 5 }; - + // getBaseOpcodeFor - This function returns the "base" X86 opcode for the // specified machine instruction. // static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) { return TSFlags >> X86II::OpcodeShift; } - + static inline bool hasImm(uint64_t TSFlags) { return (TSFlags & X86II::ImmMask) != 0; } - + /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field /// of the specified instruction. static inline unsigned getSizeOfImm(uint64_t TSFlags) { @@ -508,7 +510,7 @@ namespace X86II { case X86II::Imm64: return 8; } } - + /// isImmPCRel - Return true if the immediate of the specified instruction's /// TSFlags indicates that it is pc relative. static inline unsigned isImmPCRel(uint64_t TSFlags) { @@ -525,7 +527,7 @@ namespace X86II { return false; } } - + /// getMemoryOperandNo - The function returns the MCInst operand # for the /// first field of the memory operand. If the instruction doesn't have a /// memory operand, this returns -1. @@ -549,11 +551,11 @@ namespace X86II { case X86II::MRMDestMem: return 0; case X86II::MRMSrcMem: { - bool HasVEX_4V = (TSFlags >> 32) & X86II::VEX_4V; + bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; unsigned FirstMemOp = 1; if (HasVEX_4V) ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV). - + // FIXME: Maybe lea should have its own form? This is a horrible hack. //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || // Opcode == X86::LEA16r || Opcode == X86::LEA32r) @@ -613,7 +615,7 @@ inline static bool isMem(const MachineInstr *MI, unsigned Op) { class X86InstrInfo : public TargetInstrInfoImpl { X86TargetMachine &TM; const X86RegisterInfo RI; - + /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1, /// RegOp2MemOpTable2 - Load / store folding opcode maps. /// @@ -621,7 +623,7 @@ class X86InstrInfo : public TargetInstrInfoImpl { DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable0; DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable1; DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2; - + /// MemOp2RegOpTable - Load / store unfolding opcode map. /// DenseMap<unsigned, std::pair<unsigned, unsigned> > MemOp2RegOpTable; @@ -795,7 +797,7 @@ public: virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore, unsigned *LoadRegIndex = 0) const; - + /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler /// to determine if two loads are loading from the same base address. It /// should only return true if the base pointers are the same and the @@ -805,7 +807,7 @@ public: int64_t &Offset1, int64_t &Offset2) const; /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to - /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should + /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should /// be scheduled togther. On some targets if two loads are loading from /// addresses in the same cache line, it's better if they are scheduled /// together. This function takes two integers that represent the load offsets @@ -829,7 +831,7 @@ public: return (reg == X86::SPL || reg == X86::BPL || reg == X86::SIL || reg == X86::DIL); } - + static bool isX86_64ExtendedReg(const MachineOperand &MO) { if (!MO.isReg()) return false; return isX86_64ExtendedReg(MO.getReg()); @@ -858,11 +860,13 @@ public: const SmallVectorImpl<MachineOperand> &MOs, unsigned Size, unsigned Alignment) const; + bool isHighLatencyDef(int opc) const; + bool hasHighOperandLatency(const InstrItineraryData *ItinData, const MachineRegisterInfo *MRI, const MachineInstr *DefMI, unsigned DefIdx, const MachineInstr *UseMI, unsigned UseIdx) const; - + private: MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc, MachineFunction::iterator &MFI, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index f832a7c..03a0b0c 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -459,7 +459,7 @@ def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">; include "X86InstrFormats.td" //===----------------------------------------------------------------------===// -// Pattern fragments... +// Pattern fragments. // // X86 specific condition code. These correspond to CondCode in @@ -481,21 +481,21 @@ def X86_COND_O : PatLeaf<(i8 13)>; def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE def X86_COND_S : PatLeaf<(i8 15)>; -def immSext8 : PatLeaf<(imm), [{ return immSext8(N); }]>; +let FastIselShouldIgnore = 1 in { // FastIsel should ignore all simm8 instrs. + def i16immSExt8 : ImmLeaf<i16, [{ return Imm == (int8_t)Imm; }]>; + def i32immSExt8 : ImmLeaf<i32, [{ return Imm == (int8_t)Imm; }]>; + def i64immSExt8 : ImmLeaf<i64, [{ return Imm == (int8_t)Imm; }]>; +} -def i16immSExt8 : PatLeaf<(i16 immSext8)>; -def i32immSExt8 : PatLeaf<(i32 immSext8)>; -def i64immSExt8 : PatLeaf<(i64 immSext8)>; -def i64immSExt32 : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>; -def i64immZExt32 : PatLeaf<(i64 imm), [{ - // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit - // unsignedsign extended field. - return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue(); -}]>; +def i64immSExt32 : ImmLeaf<i64, [{ return Imm == (int32_t)Imm; }]>; + + +// i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit +// unsigned field. +def i64immZExt32 : ImmLeaf<i64, [{ return (uint64_t)Imm == (uint32_t)Imm; }]>; -def i64immZExt32SExt8 : PatLeaf<(i64 imm), [{ - uint64_t v = N->getZExtValue(); - return v == (uint32_t)v && (int32_t)v == (int8_t)v; +def i64immZExt32SExt8 : ImmLeaf<i64, [{ + return (uint64_t)Imm == (uint32_t)Imm && (int32_t)Imm == (int8_t)Imm; }]>; // Helper fragments for loads. @@ -1437,7 +1437,7 @@ def : InstAlias<"idivq $src, %rax", (IDIV64m i64mem:$src)>; // Various unary fpstack operations default to operating on on ST1. // For example, "fxch" -> "fxch %st(1)" -def : InstAlias<"faddp", (ADD_FPrST0 ST1)>; +def : InstAlias<"faddp", (ADD_FPrST0 ST1), 0>; def : InstAlias<"fsubp", (SUBR_FPrST0 ST1)>; def : InstAlias<"fsubrp", (SUB_FPrST0 ST1)>; def : InstAlias<"fmulp", (MUL_FPrST0 ST1)>; @@ -1455,13 +1455,15 @@ def : InstAlias<"fucompi", (UCOM_FIPr ST1)>; // For example, "fadd %st(4), %st(0)" -> "fadd %st(4)". We also disambiguate // instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with // gas. -multiclass FpUnaryAlias<string Mnemonic, Instruction Inst> { - def : InstAlias<!strconcat(Mnemonic, " $op, %st(0)"), (Inst RST:$op)>; - def : InstAlias<!strconcat(Mnemonic, " %st(0), %st(0)"), (Inst ST0)>; +multiclass FpUnaryAlias<string Mnemonic, Instruction Inst, bit EmitAlias = 1> { + def : InstAlias<!strconcat(Mnemonic, " $op, %st(0)"), + (Inst RST:$op), EmitAlias>; + def : InstAlias<!strconcat(Mnemonic, " %st(0), %st(0)"), + (Inst ST0), EmitAlias>; } defm : FpUnaryAlias<"fadd", ADD_FST0r>; -defm : FpUnaryAlias<"faddp", ADD_FPrST0>; +defm : FpUnaryAlias<"faddp", ADD_FPrST0, 0>; defm : FpUnaryAlias<"fsub", SUB_FST0r>; defm : FpUnaryAlias<"fsubp", SUBR_FPrST0>; defm : FpUnaryAlias<"fsubr", SUBR_FST0r>; @@ -1472,8 +1474,8 @@ defm : FpUnaryAlias<"fdiv", DIV_FST0r>; defm : FpUnaryAlias<"fdivp", DIVR_FPrST0>; defm : FpUnaryAlias<"fdivr", DIVR_FST0r>; defm : FpUnaryAlias<"fdivrp", DIV_FPrST0>; -defm : FpUnaryAlias<"fcomi", COM_FIr>; -defm : FpUnaryAlias<"fucomi", UCOM_FIr>; +defm : FpUnaryAlias<"fcomi", COM_FIr, 0>; +defm : FpUnaryAlias<"fucomi", UCOM_FIr, 0>; defm : FpUnaryAlias<"fcompi", COM_FIPr>; defm : FpUnaryAlias<"fucompi", UCOM_FIPr>; @@ -1481,7 +1483,7 @@ defm : FpUnaryAlias<"fucompi", UCOM_FIPr>; // Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they // commute. We also allow fdiv[r]p/fsubrp even though they don't commute, // solely because gas supports it. -def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op)>; +def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op), 0>; def : InstAlias<"fmulp %st(0), $op", (MUL_FPrST0 RST:$op)>; def : InstAlias<"fsubrp %st(0), $op", (SUB_FPrST0 RST:$op)>; def : InstAlias<"fdivp %st(0), $op", (DIVR_FPrST0 RST:$op)>; @@ -1534,29 +1536,31 @@ def : InstAlias<"mov $seg, $mem", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg)>; def : InstAlias<"movq $imm, $reg", (MOV64ri GR64:$reg, i64imm:$imm)>; // Match 'movq GR64, MMX' as an alias for movd. -def : InstAlias<"movq $src, $dst", (MMX_MOVD64to64rr VR64:$dst, GR64:$src)>; -def : InstAlias<"movq $src, $dst", (MMX_MOVD64from64rr GR64:$dst, VR64:$src)>; +def : InstAlias<"movq $src, $dst", + (MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>; +def : InstAlias<"movq $src, $dst", + (MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>; // movsd with no operands (as opposed to the SSE scalar move of a double) is an // alias for movsl. (as in rep; movsd) def : InstAlias<"movsd", (MOVSD)>; // movsx aliases -def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8:$src)>; -def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src)>; -def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src)>; -def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src)>; -def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src)>; -def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src)>; -def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src)>; +def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8:$src), 0>; +def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src), 0>; +def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>; +def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>; +def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>; +def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>; +def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>; // movzx aliases -def : InstAlias<"movzx $src, $dst", (MOVZX16rr8W GR16:$dst, GR8:$src)>; -def : InstAlias<"movzx $src, $dst", (MOVZX16rm8W GR16:$dst, i8mem:$src)>; -def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src)>; -def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src)>; -def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src)>; -def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src)>; +def : InstAlias<"movzx $src, $dst", (MOVZX16rr8W GR16:$dst, GR8:$src), 0>; +def : InstAlias<"movzx $src, $dst", (MOVZX16rm8W GR16:$dst, i8mem:$src), 0>; +def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>; +def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>; +def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src), 0>; +def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src), 0>; // Note: No GR32->GR64 movzx form. // outb %dx -> outb %al, %dx diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b912949..cde3f6b 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -135,18 +135,16 @@ class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop, // is used instead. Register-to-register movss/movsd is not modeled as an // INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable // in terms of a copy, and just mentioned, we don't use movss/movsd for copies. -let isAsmParserOnly = 1 in { - def VMOVSSrr : sse12_move_rr<FR32, v4f32, - "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V; - def VMOVSDrr : sse12_move_rr<FR64, v2f64, - "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V; +def VMOVSSrr : sse12_move_rr<FR32, v4f32, + "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V; +def VMOVSDrr : sse12_move_rr<FR64, v2f64, + "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V; - let canFoldAsLoad = 1, isReMaterializable = 1 in { - def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX; +let canFoldAsLoad = 1, isReMaterializable = 1 in { + def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX; - let AddedComplexity = 20 in - def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX; - } + let AddedComplexity = 20 in + def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX; } let Constraints = "$src1 = $dst" in { @@ -218,14 +216,12 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), "movsd\t{$src, $dst|$dst, $src}", [(store FR64:$src, addr:$dst)]>; -let isAsmParserOnly = 1 in { def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), "movss\t{$src, $dst|$dst, $src}", [(store FR32:$src, addr:$dst)]>, XS, VEX; def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), "movsd\t{$src, $dst|$dst, $src}", [(store FR64:$src, addr:$dst)]>, XD, VEX; -} // Extract and store. def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), @@ -251,7 +247,6 @@ let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in [(set RC:$dst, (ld_frag addr:$src))], d>; } -let isAsmParserOnly = 1 in { defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", SSEPackedSingle>, VEX; defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, @@ -269,7 +264,6 @@ defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups", SSEPackedSingle>, VEX; defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd", SSEPackedDouble, 0>, OpSize, VEX; -} defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", SSEPackedSingle>, TB; defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, @@ -279,7 +273,6 @@ defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", SSEPackedDouble, 0>, TB, OpSize; -let isAsmParserOnly = 1 in { def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX; @@ -304,7 +297,6 @@ def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v4f64 VR256:$src), addr:$dst)]>, VEX; -} def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>; def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), @@ -328,32 +320,14 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(store (v2f64 VR128:$src), addr:$dst)]>; // Intrinsic forms of MOVUPS/D load and store -let isAsmParserOnly = 1 in { - let canFoldAsLoad = 1, isReMaterializable = 1 in - def VMOVUPSrm_Int : VPSI<0x10, MRMSrcMem, (outs VR128:$dst), - (ins f128mem:$src), - "movups\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>, VEX; - def VMOVUPDrm_Int : VPDI<0x10, MRMSrcMem, (outs VR128:$dst), - (ins f128mem:$src), - "movupd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>, VEX; - def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movups\t{$src, $dst|$dst, $src}", - [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX; - def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movupd\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX; -} -let canFoldAsLoad = 1, isReMaterializable = 1 in -def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "movups\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>; -def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "movupd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>; +def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movups\t{$src, $dst|$dst, $src}", + [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX; +def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX; def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movups\t{$src, $dst|$dst, $src}", @@ -382,7 +356,7 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC, SSEPackedDouble>, TB, OpSize; } -let isAsmParserOnly = 1, AddedComplexity = 20 in { +let AddedComplexity = 20 in { defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp", "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V; defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp", @@ -395,7 +369,6 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in { "\t{$src2, $dst|$dst, $src2}">; } -let isAsmParserOnly = 1 in { def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), @@ -404,7 +377,6 @@ def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlpd\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), addr:$dst)]>, VEX; -} def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), @@ -416,7 +388,6 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), // v2f64 extract element 1 is always custom lowered to unpack high to low // and extract element 0 so the non-store version isn't too horrible. -let isAsmParserOnly = 1 in { def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract @@ -429,7 +400,6 @@ def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), (v2f64 (unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst)]>, VEX; -} def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract @@ -441,7 +411,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), (v2f64 (unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst)]>; -let isAsmParserOnly = 1, AddedComplexity = 20 in { +let AddedComplexity = 20 in { def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -516,7 +486,6 @@ multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; } -let isAsmParserOnly = 1 in { defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX; defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, @@ -542,7 +511,6 @@ defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD, VEX_4V; defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD, VEX_4V, VEX_W; -} defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si\t{$src, $dst|$dst, $src}">, XS; @@ -591,27 +559,25 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>; } -let isAsmParserOnly = 1 in { - defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, - f32mem, load, "cvtss2si">, XS, VEX; - defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, - int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">, - XS, VEX, VEX_W; - defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, - f128mem, load, "cvtsd2si">, XD, VEX; - defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, - int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">, - XD, VEX, VEX_W; - - // FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_ - // Get rid of this hack or rename the intrinsics, there are several - // intructions that only match with the intrinsic form, why create duplicates - // to let them be recognized by the assembler? - defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem, - "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX; - defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem, - "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W; -} +defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, + f32mem, load, "cvtss2si">, XS, VEX; +defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, + int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">, + XS, VEX, VEX_W; +defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, + f128mem, load, "cvtsd2si">, XD, VEX; +defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, + int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">, + XD, VEX, VEX_W; + +// FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_ +// Get rid of this hack or rename the intrinsics, there are several +// intructions that only match with the intrinsic form, why create duplicates +// to let them be recognized by the assembler? +defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem, + "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX; +defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem, + "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W; defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, f32mem, load, "cvtss2si">, XS; defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, @@ -622,18 +588,16 @@ defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si{q}">, XD, REX_W; -let isAsmParserOnly = 1 in { - defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V; - defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V, - VEX_W; - defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V; - defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD, - VEX_4V, VEX_W; -} +defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V; +defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V, + VEX_W; +defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V; +defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD, + VEX_4V, VEX_W; let Constraints = "$src1 = $dst" in { defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, @@ -653,7 +617,6 @@ let Constraints = "$src1 = $dst" in { /// SSE 1 Only // Aliases for intrinsics -let isAsmParserOnly = 1 in { defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, f32mem, load, "cvttss2si">, XS, VEX; defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, @@ -664,7 +627,6 @@ defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse2_cvttsd2si64, f128mem, load, "cvttsd2si">, XD, VEX, VEX_W; -} defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, f32mem, load, "cvttss2si">, XS; defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, @@ -676,7 +638,7 @@ defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse2_cvttsd2si64, f128mem, load, "cvttsd2si{q}">, XD, REX_W; -let isAsmParserOnly = 1, Pattern = []<dag> in { +let Pattern = []<dag> in { defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX; defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load, @@ -702,7 +664,6 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/, /// SSE 2 Only // Convert scalar double to scalar single -let isAsmParserOnly = 1 in { def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, @@ -711,7 +672,6 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins FR64:$src1, f64mem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V; -} def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, Requires<[HasAVX]>; @@ -723,7 +683,6 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), [(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD, Requires<[HasSSE2, OptForSize]>; -let isAsmParserOnly = 1 in defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", 0>, XS, VEX_4V; @@ -732,7 +691,7 @@ defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss">, XS; // Convert scalar single to scalar double -let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix +// SSE2 instructions with XS prefix def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -741,7 +700,6 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins FR32:$src1, f32mem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>; -} def : Pat<(f64 (fextend FR32:$src)), (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>; @@ -754,7 +712,6 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), [(set FR64:$dst, (extloadf32 addr:$src))]>, XS, Requires<[HasSSE2, OptForSize]>; -let isAsmParserOnly = 1 in { def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -767,7 +724,6 @@ def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, (load addr:$src2)))]>, XS, VEX_4V, Requires<[HasAVX]>; -} let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -788,7 +744,7 @@ def : Pat<(extloadf32 addr:$src), Requires<[HasSSE2, OptForSpeed]>; // Convert doubleword to packed single/double fp -let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix +// SSE2 instructions without OpSize prefix def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtdq2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>, @@ -798,7 +754,6 @@ def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), [(set VR128:$dst, (int_x86_sse2_cvtdq2ps (bitconvert (memopv2i64 addr:$src))))]>, TB, VEX, Requires<[HasAVX]>; -} def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>, @@ -810,7 +765,7 @@ def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), TB, Requires<[HasSSE2]>; // FIXME: why the non-intrinsic version is described as SSE3? -let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix +// SSE2 instructions with XS prefix def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>, @@ -820,7 +775,6 @@ def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), [(set VR128:$dst, (int_x86_sse2_cvtdq2pd (bitconvert (memopv2i64 addr:$src))))]>, XS, VEX, Requires<[HasAVX]>; -} def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>, @@ -833,7 +787,6 @@ def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), // Convert packed single/double fp to doubleword -let isAsmParserOnly = 1 in { def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), @@ -842,13 +795,11 @@ def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; -} def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", []>; def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", []>; -let isAsmParserOnly = 1 in { def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>, @@ -858,7 +809,6 @@ def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq (memop addr:$src)))]>, VEX; -} def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>; @@ -867,7 +817,7 @@ def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), [(set VR128:$dst, (int_x86_sse2_cvtps2dq (memop addr:$src)))]>; -let isAsmParserOnly = 1 in { // SSE2 packed instructions with XD prefix +// SSE2 packed instructions with XD prefix def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, @@ -877,7 +827,6 @@ def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), [(set VR128:$dst, (int_x86_sse2_cvtpd2dq (memop addr:$src)))]>, XD, VEX, Requires<[HasAVX]>; -} def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, @@ -890,7 +839,7 @@ def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), // Convert with truncation packed single/double fp to doubleword -let isAsmParserOnly = 1 in { // SSE2 packed instructions with XS prefix +// SSE2 packed instructions with XS prefix def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), @@ -899,7 +848,6 @@ def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; -} def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -910,7 +858,6 @@ def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), (int_x86_sse2_cvttps2dq (memop addr:$src)))]>; -let isAsmParserOnly = 1 in { def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -921,9 +868,7 @@ def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), [(set VR128:$dst, (int_x86_sse2_cvttps2dq (memop addr:$src)))]>, XS, VEX, Requires<[HasAVX]>; -} -let isAsmParserOnly = 1 in { def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", @@ -934,7 +879,6 @@ def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memop addr:$src)))]>, VEX; -} def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>; @@ -943,7 +887,6 @@ def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memop addr:$src)))]>; -let isAsmParserOnly = 1 in { // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. @@ -963,10 +906,9 @@ def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; -} // Convert packed single to packed double -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { // SSE2 instructions without OpSize prefix def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX; @@ -982,7 +924,6 @@ def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB; -let isAsmParserOnly = 1 in { def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>, @@ -992,7 +933,6 @@ def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), [(set VR128:$dst, (int_x86_sse2_cvtps2pd (load addr:$src)))]>, VEX, Requires<[HasAVX]>; -} def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>, @@ -1004,7 +944,6 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), TB, Requires<[HasSSE2]>; // Convert packed double to packed single -let isAsmParserOnly = 1 in { // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. @@ -1024,14 +963,12 @@ def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; -} def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", []>; def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", []>; -let isAsmParserOnly = 1 in { def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>; @@ -1040,7 +977,6 @@ def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps (memop addr:$src)))]>; -} def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>; @@ -1089,26 +1025,27 @@ def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)), // sse12_cmp_scalar - sse 1 & 2 compare scalar instructions multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, string asm, string asm_alt> { - def rr : SIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), - asm, []>; - let mayLoad = 1 in - def rm : SIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc), - asm, []>; - // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1 in { - def rr_alt : SIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2), - asm_alt, []>; + def rr : SIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), + asm, []>; let mayLoad = 1 in - def rm_alt : SIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2), - asm_alt, []>; + def rm : SIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc), + asm, []>; } + + // Accept explicit immediate argument form instead of comparison code. + def rr_alt : SIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2), + asm_alt, []>; + let mayLoad = 1 in + def rm_alt : SIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2), + asm_alt, []>; } -let neverHasSideEffects = 1, isAsmParserOnly = 1 in { +let neverHasSideEffects = 1 in { defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}", "cmpss\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">, @@ -1141,14 +1078,12 @@ multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop, } // Aliases to match intrinsics which expect XMM operand(s). -let isAsmParserOnly = 1 in { - defm Int_VCMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss, - "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">, - XS, VEX_4V; - defm Int_VCMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd, - "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">, - XD, VEX_4V; -} +defm Int_VCMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss, + "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">, + XS, VEX_4V; +defm Int_VCMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd, + "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">, + XD, VEX_4V; let Constraints = "$src1 = $dst" in { defm Int_CMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss, "cmp${cc}ss\t{$src, $dst|$dst, $src}">, XS; @@ -1171,28 +1106,26 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode, } let Defs = [EFLAGS] in { - let isAsmParserOnly = 1 in { - defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, - "ucomiss", SSEPackedSingle>, VEX; - defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, - "ucomisd", SSEPackedDouble>, OpSize, VEX; - let Pattern = []<dag> in { - defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, - "comiss", SSEPackedSingle>, VEX; - defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, - "comisd", SSEPackedDouble>, OpSize, VEX; - } - - defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, - load, "ucomiss", SSEPackedSingle>, VEX; - defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, - load, "ucomisd", SSEPackedDouble>, OpSize, VEX; - - defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, - load, "comiss", SSEPackedSingle>, VEX; - defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, - load, "comisd", SSEPackedDouble>, OpSize, VEX; + defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, + "ucomiss", SSEPackedSingle>, VEX; + defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, + "ucomisd", SSEPackedDouble>, OpSize, VEX; + let Pattern = []<dag> in { + defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, + "comiss", SSEPackedSingle>, VEX; + defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, + "comisd", SSEPackedDouble>, OpSize, VEX; } + + defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, + load, "ucomiss", SSEPackedSingle>, VEX; + defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, + load, "ucomisd", SSEPackedDouble>, OpSize, VEX; + + defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, + load, "comiss", SSEPackedSingle>, VEX; + defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, + load, "comisd", SSEPackedDouble>, OpSize, VEX; defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, "ucomiss", SSEPackedSingle>, TB; defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, @@ -1220,41 +1153,40 @@ let Defs = [EFLAGS] in { multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int, string asm, string asm_alt, Domain d> { - def rri : PIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm, - [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>; - def rmi : PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm, - [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>; - // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1 in { - def rri_alt : PIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2), - asm_alt, [], d>; - def rmi_alt : PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2), - asm_alt, [], d>; + def rri : PIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm, + [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>; + def rmi : PIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm, + [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>; } -} -let isAsmParserOnly = 1 in { - defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, - "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}", - "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", - SSEPackedSingle>, VEX_4V; - defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd, - "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}", - "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; - defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256, - "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}", - "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", - SSEPackedSingle>, VEX_4V; - defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256, - "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}", - "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; -} + // Accept explicit immediate argument form instead of comparison code. + def rri_alt : PIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2), + asm_alt, [], d>; + def rmi_alt : PIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2), + asm_alt, [], d>; +} + +defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, + "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}", + "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", + SSEPackedSingle>, VEX_4V; +defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd, + "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}", + "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; +defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256, + "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}", + "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", + SSEPackedSingle>, VEX_4V; +defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256, + "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}", + "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; let Constraints = "$src1 = $dst" in { defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, "cmp${cc}ps\t{$src, $dst|$dst, $src}", @@ -1294,20 +1226,18 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, (vt (shufp:$src3 RC:$src1, RC:$src2)))], d>; } -let isAsmParserOnly = 1 in { - defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, - "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - memopv4f32, SSEPackedSingle>, VEX_4V; - defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32, - "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - memopv8f32, SSEPackedSingle>, VEX_4V; - defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64, - "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}", - memopv2f64, SSEPackedDouble>, OpSize, VEX_4V; - defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64, - "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}", - memopv4f64, SSEPackedDouble>, OpSize, VEX_4V; -} +defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, + "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + memopv4f32, SSEPackedSingle>, TB, VEX_4V; +defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32, + "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + memopv8f32, SSEPackedSingle>, TB, VEX_4V; +defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64, + "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}", + memopv2f64, SSEPackedDouble>, TB, OpSize, VEX_4V; +defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64, + "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}", + memopv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V; let Constraints = "$src1 = $dst" in { defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32, @@ -1340,33 +1270,31 @@ multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt, } let AddedComplexity = 10 in { - let isAsmParserOnly = 1 in { - defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, - VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, VEX_4V; - defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64, - VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; - defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32, - VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, VEX_4V; - defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64, - VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; - - defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32, - VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, VEX_4V; - defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64, - VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; - defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32, - VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, VEX_4V; - defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64, - VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; - } + defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, + VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64, + VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32, + VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64, + VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + + defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32, + VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64, + VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32, + VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64, + VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; let Constraints = "$src1 = $dst" in { defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, @@ -1404,30 +1332,28 @@ defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps", defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd", SSEPackedDouble>, TB, OpSize; -let isAsmParserOnly = 1 in { - defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, - "movmskps", SSEPackedSingle>, VEX; - defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, - "movmskpd", SSEPackedDouble>, OpSize, - VEX; - defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256, - "movmskps", SSEPackedSingle>, VEX; - defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256, - "movmskpd", SSEPackedDouble>, OpSize, - VEX; +defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, + "movmskps", SSEPackedSingle>, VEX; +defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, + "movmskpd", SSEPackedDouble>, OpSize, + VEX; +defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256, + "movmskps", SSEPackedSingle>, VEX; +defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256, + "movmskpd", SSEPackedDouble>, OpSize, + VEX; - // Assembler Only - def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), - "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX; - def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), - "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize, - VEX; - def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), - "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX; - def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), - "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize, - VEX; -} +// Assembler Only +def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), + "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX; +def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), + "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize, + VEX; +def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), + "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX; +def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), + "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize, + VEX; //===----------------------------------------------------------------------===// // SSE 1 & 2 - Misc aliasing of packed SSE 1 & 2 instructions @@ -1482,13 +1408,11 @@ def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), /// multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr, SDNode OpNode> { - let isAsmParserOnly = 1 in { - defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, - FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, VEX_4V; + defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, + FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, VEX_4V; - defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, - FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, OpSize, VEX_4V; - } + defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, + FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, OpSize, VEX_4V; let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32, @@ -1514,7 +1438,7 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, SDNode OpNode, int HasPat = 0, list<list<dag>> Pattern = []> { - let isAsmParserOnly = 1, Pattern = []<dag> in { + let Pattern = []<dag> in { defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, !strconcat(OpcodeStr, "ps"), f128mem, !if(HasPat, Pattern[0], // rr @@ -1561,7 +1485,6 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, /// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms /// -let isAsmParserOnly = 1 in { multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr> { defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, !strconcat(OpcodeStr, "ps"), f256mem, [], [], 0>, VEX_4V; @@ -1569,7 +1492,6 @@ multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr> { defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, !strconcat(OpcodeStr, "pd"), f256mem, [], [], 0>, OpSize, VEX_4V; } -} // AVX 256-bit packed logical ops forms defm VAND : sse12_fp_packed_logical_y<0x54, "and">; @@ -1667,38 +1589,36 @@ multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> { } // Binary Arithmetic instructions -let isAsmParserOnly = 1 in { - defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>, - basic_sse12_fp_binop_s_int<0x58, "add", 0>, - basic_sse12_fp_binop_p<0x58, "add", fadd, 0>, - basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V; - defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>, - basic_sse12_fp_binop_s_int<0x59, "mul", 0>, - basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>, - basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V; +defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>, + basic_sse12_fp_binop_s_int<0x58, "add", 0>, + basic_sse12_fp_binop_p<0x58, "add", fadd, 0>, + basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V; +defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>, + basic_sse12_fp_binop_s_int<0x59, "mul", 0>, + basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>, + basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V; - let isCommutable = 0 in { - defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>, - basic_sse12_fp_binop_s_int<0x5C, "sub", 0>, - basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>, - basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V; - defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>, - basic_sse12_fp_binop_s_int<0x5E, "div", 0>, - basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>, - basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V; - defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>, - basic_sse12_fp_binop_s_int<0x5F, "max", 0>, - basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>, - basic_sse12_fp_binop_p_int<0x5F, "max", 0>, - basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, - basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V; - defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>, - basic_sse12_fp_binop_s_int<0x5D, "min", 0>, - basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>, - basic_sse12_fp_binop_p_int<0x5D, "min", 0>, - basic_sse12_fp_binop_p_y_int<0x5D, "min">, - basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V; - } +let isCommutable = 0 in { + defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>, + basic_sse12_fp_binop_s_int<0x5C, "sub", 0>, + basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>, + basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V; + defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>, + basic_sse12_fp_binop_s_int<0x5E, "div", 0>, + basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>, + basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V; + defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>, + basic_sse12_fp_binop_s_int<0x5F, "max", 0>, + basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>, + basic_sse12_fp_binop_p_int<0x5F, "max", 0>, + basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, + basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V; + defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>, + basic_sse12_fp_binop_s_int<0x5D, "min", 0>, + basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>, + basic_sse12_fp_binop_p_int<0x5D, "min", 0>, + basic_sse12_fp_binop_p_y_int<0x5D, "min">, + basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V; } let Constraints = "$src1 = $dst" in { @@ -1899,7 +1819,7 @@ multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr, [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))]>; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { // Square root. defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ss>, sse2_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_sd>, @@ -1955,67 +1875,65 @@ defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>, // SSE 1 & 2 - Non-temporal stores //===----------------------------------------------------------------------===// -let isAsmParserOnly = 1 in { - def VMOVNTPSmr_Int : VPSI<0x2B, MRMDestMem, (outs), - (ins i128mem:$dst, VR128:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>, VEX; - def VMOVNTPDmr_Int : VPDI<0x2B, MRMDestMem, (outs), - (ins i128mem:$dst, VR128:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>, VEX; +def VMOVNTPSmr_Int : VPSI<0x2B, MRMDestMem, (outs), + (ins i128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>, VEX; +def VMOVNTPDmr_Int : VPDI<0x2B, MRMDestMem, (outs), + (ins i128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>, VEX; - let ExeDomain = SSEPackedInt in - def VMOVNTDQmr_Int : VPDI<0xE7, MRMDestMem, (outs), +let ExeDomain = SSEPackedInt in + def VMOVNTDQmr_Int : VPDI<0xE7, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>, VEX; + +let AddedComplexity = 400 in { // Prefer non-temporal versions + def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), + addr:$dst)]>, VEX; + def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>, VEX; - - let AddedComplexity = 400 in { // Prefer non-temporal versions - def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), - addr:$dst)]>, VEX; - def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2f64 VR128:$src), - addr:$dst)]>, VEX; - def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2f64 VR128:$src), - addr:$dst)]>, VEX; - let ExeDomain = SSEPackedInt in - def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2f64 VR128:$src), + addr:$dst)]>, VEX; + def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), + [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>, VEX; - - def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v8f32 VR256:$src), - addr:$dst)]>, VEX; - def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f64 VR256:$src), - addr:$dst)]>, VEX; - def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f64 VR256:$src), - addr:$dst)]>, VEX; - let ExeDomain = SSEPackedInt in - def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), + let ExeDomain = SSEPackedInt in + def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), + addr:$dst)]>, VEX; + + def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8f32 VR256:$src), + addr:$dst)]>, VEX; + def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f64 VR256:$src), + addr:$dst)]>, VEX; + def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v8f32 VR256:$src), + [(alignednontemporalstore (v4f64 VR256:$src), addr:$dst)]>, VEX; - } + let ExeDomain = SSEPackedInt in + def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8f32 VR256:$src), + addr:$dst)]>, VEX; } def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src), @@ -2138,12 +2056,10 @@ def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), // SSE 1 & 2 - Load/Store XCSR register //===----------------------------------------------------------------------===// -let isAsmParserOnly = 1 in { - def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), - "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX; - def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), - "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX; -} +def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), + "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX; +def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), + "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX; def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src), "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>; @@ -2156,45 +2072,43 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), let ExeDomain = SSEPackedInt in { // SSE integer instructions -let isAsmParserOnly = 1 in { - let neverHasSideEffects = 1 in { - def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; - def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; - } - def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; - def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; - - let canFoldAsLoad = 1, mayLoad = 1 in { - def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; - def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; - let Predicates = [HasAVX] in { - def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; - def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), - "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; - } - } +let neverHasSideEffects = 1 in { +def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; +def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; +} +def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; +def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; - let mayStore = 1 in { - def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), - (ins i128mem:$dst, VR128:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; - def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), - (ins i256mem:$dst, VR256:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; - let Predicates = [HasAVX] in { - def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), +let canFoldAsLoad = 1, mayLoad = 1 in { +def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; +def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; +let Predicates = [HasAVX] in { + def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; - def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), + def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; - } - } +} +} + +let mayStore = 1 in { +def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), + (ins i128mem:$dst, VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; +def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), + (ins i256mem:$dst, VR256:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; +let Predicates = [HasAVX] in { +def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; +def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), + "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; +} } let neverHasSideEffects = 1 in @@ -2226,23 +2140,11 @@ def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), } // Intrinsic forms of MOVDQU load and store -let isAsmParserOnly = 1 in { -let canFoldAsLoad = 1 in -def VMOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "vmovdqu\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>, - XS, VEX, Requires<[HasAVX]>; def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "vmovdqu\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>, XS, VEX, Requires<[HasAVX]>; -} -let canFoldAsLoad = 1 in -def MOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "movdqu\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>, - XS, Requires<[HasSSE2]>; def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>, @@ -2347,7 +2249,7 @@ multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode, // 128-bit Integer Arithmetic -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 1, 0 /*3addr*/>, VEX_4V; defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 1, 0>, VEX_4V; defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 1, 0>, VEX_4V; @@ -2437,7 +2339,7 @@ defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>; // SSE2 - Packed Integer Logical Instructions //===---------------------------------------------------------------------===// -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>, VEX_4V; @@ -2584,7 +2486,7 @@ let Predicates = [HasSSE2] in { // SSE2 - Packed Integer Comparison Instructions //===---------------------------------------------------------------------===// -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPCMPEQB : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, 1, 0>, VEX_4V; defm VPCMPEQW : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, 1, @@ -2638,7 +2540,7 @@ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))), // SSE2 - Packed Integer Pack Instructions //===---------------------------------------------------------------------===// -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128, 0, 0>, VEX_4V; defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128, @@ -2676,7 +2578,7 @@ def mi : Ii8<0x70, MRMSrcMem, } } // ExeDomain = SSEPackedInt -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { let AddedComplexity = 5 in defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, OpSize, VEX; @@ -2724,7 +2626,7 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, addr:$src2))))]>; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, unpckl, bc_v16i8, 0>, VEX_4V; defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, unpckl, bc_v8i16, @@ -2834,7 +2736,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> { } // Extract -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in def VPEXTRWri : Ii8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -2847,7 +2749,7 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg, imm:$src2))]>; // Insert -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPINSRW : sse2_pinsrw<0>, OpSize, VEX_4V; def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), @@ -2866,13 +2768,11 @@ let Constraints = "$src1 = $dst" in let ExeDomain = SSEPackedInt in { -let isAsmParserOnly = 1 in { def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX; def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX; -} def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>; @@ -2885,7 +2785,6 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), let ExeDomain = SSEPackedInt in { -let isAsmParserOnly = 1 in { let Uses = [EDI] in def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), @@ -2896,7 +2795,6 @@ def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), "maskmovdqu\t{$mask, $src|$src, $mask}", [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, VEX; -} let Uses = [EDI] in def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), @@ -2914,7 +2812,6 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), //===---------------------------------------------------------------------===// // Move Int Doubleword to Packed Double Int -let isAsmParserOnly = 1 in { def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -2924,7 +2821,6 @@ def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, VEX; -} def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -2943,7 +2839,6 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), // Move Int Doubleword to Single Scalar -let isAsmParserOnly = 1 in { def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))]>, VEX; @@ -2952,7 +2847,6 @@ def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>, VEX; -} def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))]>; @@ -2962,7 +2856,6 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>; // Move Packed Doubleword Int to Packed Double Int -let isAsmParserOnly = 1 in { def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), @@ -2972,7 +2865,6 @@ def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)]>, VEX; -} def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), @@ -2998,14 +2890,12 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>; // Move Scalar Single to Double Int -let isAsmParserOnly = 1 in { def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))]>, VEX; def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX; -} def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))]>; @@ -3014,7 +2904,7 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>; // movd / movq to XMM register zero-extends -let AddedComplexity = 15, isAsmParserOnly = 1 in { +let AddedComplexity = 15 in { def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86vzmovl @@ -3038,7 +2928,6 @@ def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), } let AddedComplexity = 20 in { -let isAsmParserOnly = 1 in def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -3064,7 +2953,6 @@ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), //===---------------------------------------------------------------------===// // Move Quadword Int to Packed Quadword Int -let isAsmParserOnly = 1 in def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -3077,7 +2965,6 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix // Move Packed Quadword Int to Quadword Int -let isAsmParserOnly = 1 in def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (vector_extract (v2i64 VR128:$src), @@ -3091,7 +2978,6 @@ def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; // Store / copy lower 64-bits of a XMM register. -let isAsmParserOnly = 1 in def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX; @@ -3099,7 +2985,7 @@ def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>; -let AddedComplexity = 20, isAsmParserOnly = 1 in +let AddedComplexity = 20 in def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -3124,7 +3010,7 @@ def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>; // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in // IA32 document. movq xmm1, xmm2 does clear the high bits. -let isAsmParserOnly = 1, AddedComplexity = 15 in +let AddedComplexity = 15 in def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, @@ -3135,7 +3021,7 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, XS, Requires<[HasSSE2]>; -let AddedComplexity = 20, isAsmParserOnly = 1 in +let AddedComplexity = 20 in def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl @@ -3153,7 +3039,6 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), } // Instructions to match in the assembler -let isAsmParserOnly = 1 in { def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), @@ -3161,13 +3046,12 @@ def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), // Recognize "movd" with GR64 destination, but encode as a "movq" def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; -} // Instructions for the disassembler // xr = XMM register // xm = mem64 -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS; def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -3209,7 +3093,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, //===---------------------------------------------------------------------===// // Convert Packed Double FP to Packed DW Integers -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. @@ -3237,7 +3121,7 @@ def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; // Convert Packed DW Integers to Packed Double FP -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -3288,7 +3172,7 @@ def rm : S3SI<op, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { // FIXME: Merge above classes when we have patterns for the ymm version defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX; defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX; @@ -3319,7 +3203,7 @@ def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), []>; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { // FIXME: Merge above classes when we have patterns for the ymm version defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX; @@ -3327,7 +3211,7 @@ let isAsmParserOnly = 1, Predicates = [HasAVX] in { defm MOVDDUP : sse3_replicate_dfp<"movddup">; // Move Unaligned Integer -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vlddqu\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX; @@ -3391,21 +3275,21 @@ multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC, [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>; } -let isAsmParserOnly = 1, Predicates = [HasAVX], +let Predicates = [HasAVX], ExeDomain = SSEPackedDouble in { defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128, - f128mem, 0>, XD, VEX_4V; + f128mem, 0>, TB, XD, VEX_4V; defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128, - f128mem, 0>, OpSize, VEX_4V; + f128mem, 0>, TB, OpSize, VEX_4V; defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256, - f256mem, 0>, XD, VEX_4V; + f256mem, 0>, TB, XD, VEX_4V; defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256, - f256mem, 0>, OpSize, VEX_4V; + f256mem, 0>, TB, OpSize, VEX_4V; } let Constraints = "$src1 = $dst", Predicates = [HasSSE3], ExeDomain = SSEPackedDouble in { defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128, - f128mem>, XD; + f128mem>, TB, XD; defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128, f128mem>, TB, OpSize; } @@ -3444,7 +3328,7 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, int_x86_sse3_hadd_ps, 0>, VEX_4V; defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem, @@ -3496,7 +3380,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, (bitconvert (mem_frag128 addr:$src))))]>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8, int_x86_ssse3_pabs_b_128>, VEX; defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16, @@ -3538,7 +3422,7 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { let isCommutable = 0 in { defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16, int_x86_ssse3_phadd_w_128, 0>, VEX_4V; @@ -3630,7 +3514,7 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> { []>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V; let Constraints = "$src1 = $dst" in defm PALIGN : ssse3_palign<"palignr">; @@ -3985,7 +3869,7 @@ multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> { OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>, VEX; defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>, @@ -4051,7 +3935,7 @@ multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> { OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>, VEX; defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>, @@ -4092,7 +3976,7 @@ multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> { OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>, VEX; defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>, @@ -4134,7 +4018,7 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> { // (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst) } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX; def VPEXTRBrr64 : SS4AIi8<0x14, MRMDestReg, (outs GR64:$dst), (ins VR128:$src1, i32i8imm:$src2), @@ -4156,7 +4040,7 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> { // (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst) } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX; defm PEXTRW : SS41I_extract16<0x15, "pextrw">; @@ -4178,7 +4062,7 @@ multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> { addr:$dst)]>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX; defm PEXTRD : SS41I_extract32<0x16, "pextrd">; @@ -4199,7 +4083,7 @@ multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> { addr:$dst)]>, OpSize, REX_W; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W; defm PEXTRQ : SS41I_extract64<0x16, "pextrq">; @@ -4222,7 +4106,7 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> { addr:$dst)]>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX; def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst), (ins VR128:$src1, i32i8imm:$src2), @@ -4262,7 +4146,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> { imm:$src3))]>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V; let Constraints = "$src1 = $dst" in defm PINSRB : SS41I_insert8<0x20, "pinsrb">; @@ -4288,7 +4172,7 @@ multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> { imm:$src3)))]>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V; let Constraints = "$src1 = $dst" in defm PINSRD : SS41I_insert32<0x22, "pinsrd">; @@ -4314,7 +4198,7 @@ multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> { imm:$src3)))]>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W; let Constraints = "$src1 = $dst" in defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W; @@ -4347,7 +4231,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> { let Constraints = "$src1 = $dst" in defm INSERTPS : SS41I_insertf32<0x21, "insertps">; -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), @@ -4517,7 +4401,7 @@ multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd, } // FP round - roundss, roundps, roundsd, roundpd -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { // Intrinsic form defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128, memopv4f32, memopv2f64, @@ -4552,7 +4436,7 @@ defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", // ptest instruction we'll lower to this in X86ISelLowering primarily from // the intel intrinsic that corresponds to this. -let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Defs = [EFLAGS], Predicates = [HasAVX] in { def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "vptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>, @@ -4595,7 +4479,7 @@ multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC, OpSize, VEX; } -let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Defs = [EFLAGS], Predicates = [HasAVX] in { defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>; defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>; defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>; @@ -4644,7 +4528,7 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr, (bitconvert (memopv8i16 addr:$src))))]>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw", int_x86_sse41_phminposuw>, VEX; defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw", @@ -4670,7 +4554,7 @@ multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr, (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { let isCommutable = 0 in defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, 0>, VEX_4V; @@ -4737,7 +4621,7 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V; let Constraints = "$src1 = $dst" in defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>; @@ -4769,7 +4653,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { let isCommutable = 0 in { defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, VR128, memopv16i8, i128mem, 0>, VEX_4V; @@ -4810,7 +4694,7 @@ let Constraints = "$src1 = $dst" in { } /// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop, PatFrag mem_frag, Intrinsic IntId> { @@ -4870,7 +4754,7 @@ defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0), (PBLENDVBrr0 VR128:$src1, VR128:$src2)>; -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, @@ -4904,7 +4788,7 @@ multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr, (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq, 0>, VEX_4V; let Constraints = "$src1 = $dst" in @@ -4936,8 +4820,7 @@ let Defs = [EFLAGS], usesCustomInserter = 1 in { defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>; } -let Defs = [XMM0, EFLAGS], isAsmParserOnly = 1, - Predicates = [HasAVX] in { +let Defs = [XMM0, EFLAGS], Predicates = [HasAVX] in { def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; @@ -4972,7 +4855,7 @@ let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>; } -let isAsmParserOnly = 1, Predicates = [HasAVX], +let Predicates = [HasAVX], Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src3, i8imm:$src5), @@ -5007,7 +4890,7 @@ let Defs = [ECX, EFLAGS] in { } } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128, "vpcmpistri">, VEX; defm VPCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128, "vpcmpistri">, @@ -5046,7 +4929,7 @@ let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in { } } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128, "vpcmpestri">, VEX; defm VPCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128, "vpcmpestri">, @@ -5165,7 +5048,7 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, } // Perform One Round of an AES Encryption/Decryption Flow -let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in { +let Predicates = [HasAVX, HasAES] in { defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc", int_x86_aesni_aesenc, 0>, VEX_4V; defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast", @@ -5205,7 +5088,7 @@ def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))), (AESDECLASTrm VR128:$src1, addr:$src2)>; // Perform the AES InvMixColumn Transformation -let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in { +let Predicates = [HasAVX, HasAES] in { def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1), "vaesimc\t{$src1, $dst|$dst, $src1}", @@ -5233,7 +5116,7 @@ def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), OpSize; // AES Round Key Generation Assist -let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in { +let Predicates = [HasAVX, HasAES] in { def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -5269,7 +5152,6 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), // Only the AVX version of CLMUL instructions are described here. // Carry-less Multiplication instructions -let isAsmParserOnly = 1 in { def VPCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", @@ -5295,13 +5177,10 @@ defm VPCLMULHQLQDQ : avx_vpclmul<"vpclmulhqlqdq">; defm VPCLMULLQHQDQ : avx_vpclmul<"vpclmullqhqdq">; defm VPCLMULLQLQDQ : avx_vpclmul<"vpclmullqlqdq">; -} // isAsmParserOnly - //===----------------------------------------------------------------------===// // AVX Instructions //===----------------------------------------------------------------------===// -let isAsmParserOnly = 1 in { // Load from memory and broadcast to all elements of the destination operand class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC, @@ -5435,8 +5314,6 @@ def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", [(int_x86_avx_vzeroupper)]>, VEX, Requires<[HasAVX]>; -} // isAsmParserOnly - def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3), (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3), @@ -5622,11 +5499,15 @@ def : Pat<(X86Movddup (bc_v2f64 // Shuffle with UNPCKLPS def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))), + (VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), (UNPCKLPSrm VR128:$src1, addr:$src2)>; def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), (VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; +def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)), + (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), (UNPCKLPSrr VR128:$src1, VR128:$src2)>; @@ -5644,11 +5525,15 @@ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), // Shuffle with UNPCKLPD def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), (VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))), + (VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), (UNPCKLPDrm VR128:$src1, addr:$src2)>; def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; +def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)), + (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), (UNPCKLPDrr VR128:$src1, VR128:$src2)>; diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index 6a24d14..f73cff3 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -34,9 +34,16 @@ let Uses = [EFLAGS] in def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>; def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", [(int_x86_int (i8 3))]>; + +// The long form of "int $3" turns into int3 as a size optimization. +// FIXME: This doesn't work because InstAlias can't match immediate constants. +//def : InstAlias<"int\t$3", (INT3)>; + + def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", [(int_x86_int imm:$trap)]>; + def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB; def SYSRETL : I<0x07, RawFrm, (outs), (ins), "sysretl", []>, TB; def SYSRETQ :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB, @@ -207,10 +214,15 @@ def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB; -def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins), - "str{w}\t{$dst}", []>, TB; -def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins), - "str{w}\t{$dst}", []>, TB; +def STR16r : I<0x00, MRM1r, (outs GR16:$dst), (ins), + "str{w}\t{$dst}", []>, TB, OpSize; +def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins), + "str{l}\t{$dst}", []>, TB; +def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins), + "str{q}\t{$dst}", []>, TB; +def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins), + "str{w}\t{$dst}", []>, TB; + def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src), "ltr{w}\t{$src}", []>, TB; def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src), @@ -393,3 +405,23 @@ let Defs = [RDX, RAX], Uses = [RCX] in let Uses = [RDX, RAX, RCX] in def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB; + +//===----------------------------------------------------------------------===// +// VIA PadLock crypto instructions +let Defs = [RAX, RDI], Uses = [RDX, RDI] in + def XSTORE : I<0xc0, RawFrm, (outs), (ins), "xstore", []>, A7; + +let Defs = [RSI, RDI], Uses = [RBX, RDX, RSI, RDI] in { + def XCRYPTECB : I<0xc8, RawFrm, (outs), (ins), "xcryptecb", []>, A7; + def XCRYPTCBC : I<0xd0, RawFrm, (outs), (ins), "xcryptcbc", []>, A7; + def XCRYPTCTR : I<0xd8, RawFrm, (outs), (ins), "xcryptctr", []>, A7; + def XCRYPTCFB : I<0xe0, RawFrm, (outs), (ins), "xcryptcfb", []>, A7; + def XCRYPTOFB : I<0xe8, RawFrm, (outs), (ins), "xcryptofb", []>, A7; +} + +let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in { + def XSHA1 : I<0xc8, RawFrm, (outs), (ins), "xsha1", []>, A6; + def XSHA256 : I<0xd0, RawFrm, (outs), (ins), "xsha256", []>, A6; +} +let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in + def MONTMUL : I<0xc0, RawFrm, (outs), (ins), "montmul", []>, A6; diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp index 6686214..83bba52 100644 --- a/lib/Target/X86/X86MCAsmInfo.cpp +++ b/lib/Target/X86/X86MCAsmInfo.cpp @@ -15,7 +15,9 @@ #include "X86TargetMachine.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ELF.h" using namespace llvm; @@ -69,7 +71,22 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) { DwarfUsesInlineInfoSection = true; // Exceptions handling - ExceptionsType = ExceptionHandling::DwarfTable; + ExceptionsType = ExceptionHandling::DwarfCFI; +} + +const MCExpr * +X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym, + unsigned Encoding, + MCStreamer &Streamer) const { + MCContext &Context = Streamer.getContext(); + const MCExpr *Res = + MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context); + const MCExpr *Four = MCConstantExpr::Create(4, Context); + return MCBinaryExpr::CreateAdd(Res, Four, Context); +} + +X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple) + : X86MCAsmInfoDarwin(Triple) { } X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { @@ -89,7 +106,9 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { SupportsDebugInformation = true; // Exceptions handling - ExceptionsType = ExceptionHandling::DwarfTable; + ExceptionsType = ExceptionHandling::DwarfCFI; + + DwarfRequiresFrameSection = false; // OpenBSD has buggy support for .quad in 32-bit mode, just split into two // .words. diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/X86MCAsmInfo.h index 5815225..2cd4c8e 100644 --- a/lib/Target/X86/X86MCAsmInfo.h +++ b/lib/Target/X86/X86MCAsmInfo.h @@ -25,6 +25,14 @@ namespace llvm { explicit X86MCAsmInfoDarwin(const Triple &Triple); }; + struct X86_64MCAsmInfoDarwin : public X86MCAsmInfoDarwin { + explicit X86_64MCAsmInfoDarwin(const Triple &Triple); + virtual const MCExpr * + getExprForPersonalitySymbol(const MCSymbol *Sym, + unsigned Encoding, + MCStreamer &Streamer) const; + }; + struct X86ELFMCAsmInfo : public MCAsmInfo { explicit X86ELFMCAsmInfo(const Triple &Triple); virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const; diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index 0e3b571..f195a67 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -382,7 +382,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, const TargetInstrDesc &Desc, raw_ostream &OS) const { bool HasVEX_4V = false; - if ((TSFlags >> 32) & X86II::VEX_4V) + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V) HasVEX_4V = true; // VEX_R: opcode externsion equivalent to REX.R in @@ -446,10 +446,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, if (TSFlags & X86II::OpSize) VEX_PP = 0x01; - if ((TSFlags >> 32) & X86II::VEX_W) + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W) VEX_W = 1; - if ((TSFlags >> 32) & X86II::VEX_L) + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L) VEX_L = 1; switch (TSFlags & X86II::Op0Mask) { @@ -470,6 +470,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, case X86II::XD: // F2 0F VEX_PP = 0x3; break; + case X86II::A6: // Bypass: Not used by VEX + case X86II::A7: // Bypass: Not used by VEX case X86II::TB: // Bypass: Not used by VEX case 0: break; // No prefix! @@ -512,13 +514,13 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, } // To only check operands before the memory address ones, start - // the search from the begining + // the search from the beginning if (IsDestMem) CurOp = 0; // If the last register should be encoded in the immediate field // do not use any bit from VEX prefix to this register, ignore it - if ((TSFlags >> 32) & X86II::VEX_I8IMM) + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) NumOps--; for (; CurOp != NumOps; ++CurOp) { @@ -742,6 +744,8 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, case X86II::TB: // Two-byte opcode prefix case X86II::T8: // 0F 38 case X86II::TA: // 0F 3A + case X86II::A6: // 0F A6 + case X86II::A7: // 0F A7 Need0FPrefix = true; break; case X86II::TF: // F2 0F 38 @@ -786,6 +790,12 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, case X86II::TA: // 0F 3A EmitByte(0x3A, CurByte, OS); break; + case X86II::A6: // 0F A6 + EmitByte(0xA6, CurByte, OS); + break; + case X86II::A7: // 0F A7 + EmitByte(0xA7, CurByte, OS); + break; } } @@ -819,9 +829,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, // It uses the VEX.VVVV field? bool HasVEX_4V = false; - if ((TSFlags >> 32) & X86II::VEX) + if ((TSFlags >> X86II::VEXShift) & X86II::VEX) HasVEXPrefix = true; - if ((TSFlags >> 32) & X86II::VEX_4V) + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V) HasVEX_4V = true; @@ -837,7 +847,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags); - if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode) + if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode) BaseOpcode = 0x0F; // Weird 3DNow! encoding. unsigned SrcRegNum = 0; @@ -994,7 +1004,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, if (CurOp != NumOps) { // The last source register of a 4 operand instruction in AVX is encoded // in bits[7:4] of a immediate byte, and bits[3:0] are ignored. - if ((TSFlags >> 32) & X86II::VEX_I8IMM) { + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) { const MCOperand &MO = MI.getOperand(CurOp++); bool IsExtReg = X86InstrInfo::isX86_64ExtendedReg(MO.getReg()); @@ -1017,7 +1027,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, } } - if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode) + if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode) EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS); diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 2f6bd88..37fb0fe 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -308,6 +308,33 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return 0; } +const TargetRegisterClass* +X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{ + const TargetRegisterClass *Super = RC; + TargetRegisterClass::sc_iterator I = RC->superclasses_begin(); + do { + switch (Super->getID()) { + case X86::GR8RegClassID: + case X86::GR16RegClassID: + case X86::GR32RegClassID: + case X86::GR64RegClassID: + case X86::FR32RegClassID: + case X86::FR64RegClassID: + case X86::RFP32RegClassID: + case X86::RFP64RegClassID: + case X86::RFP80RegClassID: + case X86::VR128RegClassID: + case X86::VR256RegClassID: + // Don't return a super-class that would shrink the spill size. + // That can happen with the vector and float classes. + if (Super->getSize() == RC->getSize()) + return Super; + } + Super = *I++; + } while (Super); + return RC; +} + const TargetRegisterClass * X86RegisterInfo::getPointerRegClass(unsigned Kind) const { switch (Kind) { @@ -337,7 +364,27 @@ X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { else return &X86::GR32RegClass; } - return NULL; + return RC; +} + +unsigned +X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0; + switch (RC->getID()) { + default: + return 0; + case X86::GR32RegClassID: + return 4 - FPDiff; + case X86::GR64RegClassID: + return 12 - FPDiff; + case X86::VR128RegClassID: + return TM.getSubtarget<X86Subtarget>().is64Bit() ? 10 : 4; + case X86::VR64RegClassID: + return 4; + } } const unsigned * @@ -450,7 +497,7 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { // FIXME: It's more complicated than this... if (0 && requiresRealignment && MFI->hasVarSizedObjects()) report_fatal_error( - "Stack realignment in presense of dynamic allocas is not supported"); + "Stack realignment in presence of dynamic allocas is not supported"); // If we've requested that we force align the stack do so now. if (ForceStackAlign) diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 064be64..9970c52 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -91,6 +91,9 @@ public: getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass *B, unsigned Idx) const; + const TargetRegisterClass* + getLargestLegalSuperClass(const TargetRegisterClass *RC) const; + /// getPointerRegClass - Returns a TargetRegisterClass used for pointer /// values. const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; @@ -101,6 +104,9 @@ public: const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const; + unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const; + /// getCalleeSavedRegs - Return a null-terminated list of all of the /// callee-save registers on this target. const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 612fac2..fd7a247 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -46,7 +46,8 @@ let Namespace = "X86" in { def CL : Register<"cl">, DwarfRegNum<[2, 1, 1]>; def BL : Register<"bl">, DwarfRegNum<[3, 3, 3]>; - // X86-64 only + // X86-64 only, requires REX. + let CostPerUse = 1 in { def SIL : Register<"sil">, DwarfRegNum<[4, 6, 6]>; def DIL : Register<"dil">, DwarfRegNum<[5, 7, 7]>; def BPL : Register<"bpl">, DwarfRegNum<[6, 4, 5]>; @@ -59,6 +60,7 @@ let Namespace = "X86" in { def R13B : Register<"r13b">, DwarfRegNum<[13, -2, -2]>; def R14B : Register<"r14b">, DwarfRegNum<[14, -2, -2]>; def R15B : Register<"r15b">, DwarfRegNum<[15, -2, -2]>; + } // High registers. On x86-64, these cannot be used in any instruction // with a REX prefix. @@ -82,8 +84,8 @@ let Namespace = "X86" in { } def IP : Register<"ip">, DwarfRegNum<[16]>; - // X86-64 only - let SubRegIndices = [sub_8bit] in { + // X86-64 only, requires REX. + let SubRegIndices = [sub_8bit], CostPerUse = 1 in { def R8W : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>; def R9W : RegisterWithSubRegs<"r9w", [R9B]>, DwarfRegNum<[9, -2, -2]>; def R10W : RegisterWithSubRegs<"r10w", [R10B]>, DwarfRegNum<[10, -2, -2]>; @@ -105,7 +107,8 @@ let Namespace = "X86" in { def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[7, 5, 4]>; def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[16, 8, 8]>; - // X86-64 only + // X86-64 only, requires REX + let CostPerUse = 1 in { def R8D : RegisterWithSubRegs<"r8d", [R8W]>, DwarfRegNum<[8, -2, -2]>; def R9D : RegisterWithSubRegs<"r9d", [R9W]>, DwarfRegNum<[9, -2, -2]>; def R10D : RegisterWithSubRegs<"r10d", [R10W]>, DwarfRegNum<[10, -2, -2]>; @@ -114,7 +117,7 @@ let Namespace = "X86" in { def R13D : RegisterWithSubRegs<"r13d", [R13W]>, DwarfRegNum<[13, -2, -2]>; def R14D : RegisterWithSubRegs<"r14d", [R14W]>, DwarfRegNum<[14, -2, -2]>; def R15D : RegisterWithSubRegs<"r15d", [R15W]>, DwarfRegNum<[15, -2, -2]>; - } + }} // 64-bit registers, X86-64 only let SubRegIndices = [sub_32bit] in { @@ -127,6 +130,8 @@ let Namespace = "X86" in { def RBP : RegisterWithSubRegs<"rbp", [EBP]>, DwarfRegNum<[6, -2, -2]>; def RSP : RegisterWithSubRegs<"rsp", [ESP]>, DwarfRegNum<[7, -2, -2]>; + // These also require REX. + let CostPerUse = 1 in { def R8 : RegisterWithSubRegs<"r8", [R8D]>, DwarfRegNum<[8, -2, -2]>; def R9 : RegisterWithSubRegs<"r9", [R9D]>, DwarfRegNum<[9, -2, -2]>; def R10 : RegisterWithSubRegs<"r10", [R10D]>, DwarfRegNum<[10, -2, -2]>; @@ -136,7 +141,7 @@ let Namespace = "X86" in { def R14 : RegisterWithSubRegs<"r14", [R14D]>, DwarfRegNum<[14, -2, -2]>; def R15 : RegisterWithSubRegs<"r15", [R15D]>, DwarfRegNum<[15, -2, -2]>; def RIP : RegisterWithSubRegs<"rip", [EIP]>, DwarfRegNum<[16, -2, -2]>; - } + }} // MMX Registers. These are actually aliased to ST0 .. ST7 def MM0 : Register<"mm0">, DwarfRegNum<[41, 29, 29]>; @@ -170,6 +175,7 @@ let Namespace = "X86" in { def XMM7: Register<"xmm7">, DwarfRegNum<[24, 28, 28]>; // X86-64 only + let CostPerUse = 1 in { def XMM8: Register<"xmm8">, DwarfRegNum<[25, -2, -2]>; def XMM9: Register<"xmm9">, DwarfRegNum<[26, -2, -2]>; def XMM10: Register<"xmm10">, DwarfRegNum<[27, -2, -2]>; @@ -178,7 +184,7 @@ let Namespace = "X86" in { def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>; def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>; def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>; - } + }} // YMM Registers, used by AVX instructions let SubRegIndices = [sub_xmm] in { diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index 42e8193..02754f9 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -178,7 +178,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { - // This requires the copy size to be a constant, preferrably + // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (!ConstantSize) diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 1ee7312..ba5864e 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -144,7 +144,8 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { /// passed as the second argument. Otherwise it returns null. const char *X86Subtarget::getBZeroEntry() const { // Darwin 10 has a __bzero entry point for this purpose. - if (getDarwinVers() >= 10) + if (getTargetTriple().isMacOSX() && + !getTargetTriple().isMacOSXVersionLT(10, 6)) return "__bzero"; return 0; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 0a62a02..286a798 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -165,9 +165,15 @@ public: bool isUnalignedMemAccessFast() const { return IsUAMemFast; } bool hasVectorUAMem() const { return HasVectorUAMem; } - bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; } - bool isTargetFreeBSD() const { return TargetTriple.getOS() == Triple::FreeBSD; } - bool isTargetSolaris() const { return TargetTriple.getOS() == Triple::Solaris; } + const Triple &getTargetTriple() const { return TargetTriple; } + + bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } + bool isTargetFreeBSD() const { + return TargetTriple.getOS() == Triple::FreeBSD; + } + bool isTargetSolaris() const { + return TargetTriple.getOS() == Triple::Solaris; + } // ELF is a reasonably sane default and the only other X86 targets we // support are Darwin and Windows. Just use "not those". @@ -215,13 +221,6 @@ public: return PICStyle == PICStyles::StubDynamicNoPIC || PICStyle == PICStyles::StubPIC; } - /// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard, - /// 10 = Snow Leopard, etc. - unsigned getDarwinVers() const { - if (isTargetDarwin()) return TargetTriple.getDarwinMajorNumber(); - return 0; - } - /// ClassifyGlobalReference - Classify a global variable reference for the /// current subtarget according to how we should reference it in a non-pcrel /// context. diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 889c824..7483329 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -26,19 +26,18 @@ using namespace llvm; static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); - switch (TheTriple.getOS()) { - case Triple::Darwin: - return new X86MCAsmInfoDarwin(TheTriple); - case Triple::MinGW32: - case Triple::Cygwin: - case Triple::Win32: - if (TheTriple.getEnvironment() == Triple::MachO) - return new X86MCAsmInfoDarwin(TheTriple); + + if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) { + if (TheTriple.getArch() == Triple::x86_64) + return new X86_64MCAsmInfoDarwin(TheTriple); else - return new X86MCAsmInfoCOFF(TheTriple); - default: - return new X86ELFMCAsmInfo(TheTriple); + return new X86MCAsmInfoDarwin(TheTriple); } + + if (TheTriple.isOSWindows()) + return new X86MCAsmInfoCOFF(TheTriple); + + return new X86ELFMCAsmInfo(TheTriple); } static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, @@ -48,19 +47,14 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, bool RelaxAll, bool NoExecStack) { Triple TheTriple(TT); - switch (TheTriple.getOS()) { - case Triple::Darwin: + + if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll); - case Triple::MinGW32: - case Triple::Cygwin: - case Triple::Win32: - if (TheTriple.getEnvironment() == Triple::MachO) - return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll); - else - return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll); - default: - return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack); - } + + if (TheTriple.isOSWindows()) + return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll); + + return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack); } extern "C" void LLVMInitializeX86Target() { @@ -96,11 +90,11 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT, const std::string &FS) : X86TargetMachine(T, TT, FS, false), DataLayout(getSubtargetImpl()->isTargetDarwin() ? - "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32" : + "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-n8:16:32" : (getSubtargetImpl()->isTargetCygMing() || getSubtargetImpl()->isTargetWindows()) ? - "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32" : - "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"), + "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-f128:128:128-n8:16:32" : + "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-n8:16:32"), InstrInfo(*this), TSInfo(*this), TLInfo(*this), @@ -111,7 +105,7 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT, X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT, const std::string &FS) : X86TargetMachine(T, TT, FS, true), - DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64"), + DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-n8:16:32:64"), InstrInfo(*this), TSInfo(*this), TLInfo(*this), diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index c15dfbb..1231798 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -38,6 +38,12 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer); } +MCSymbol *X8664_MachoTargetObjectFile:: +getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI) const { + return Mang->getSymbol(GV); +} + unsigned X8632_ELFTargetObjectFile::getPersonalityEncoding() const { if (TM.getRelocationModel() == Reloc::PIC_) return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; @@ -52,7 +58,7 @@ unsigned X8632_ELFTargetObjectFile::getLSDAEncoding() const { return DW_EH_PE_absptr; } -unsigned X8632_ELFTargetObjectFile::getFDEEncoding() const { +unsigned X8632_ELFTargetObjectFile::getFDEEncoding(bool FDE) const { if (TM.getRelocationModel() == Reloc::PIC_) return DW_EH_PE_pcrel | DW_EH_PE_sdata4; else @@ -91,17 +97,14 @@ unsigned X8664_ELFTargetObjectFile::getLSDAEncoding() const { return DW_EH_PE_absptr; } -unsigned X8664_ELFTargetObjectFile::getFDEEncoding() const { - CodeModel::Model Model = TM.getCodeModel(); - if (TM.getRelocationModel() == Reloc::PIC_) - return DW_EH_PE_pcrel | (Model == CodeModel::Small || - Model == CodeModel::Medium ? - DW_EH_PE_sdata4 : DW_EH_PE_sdata8); +unsigned X8664_ELFTargetObjectFile::getFDEEncoding(bool CFI) const { + if (CFI) + return DW_EH_PE_pcrel | DW_EH_PE_sdata4; - if (Model == CodeModel::Small || Model == CodeModel::Medium) - return DW_EH_PE_udata4; + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_pcrel | DW_EH_PE_sdata4; - return DW_EH_PE_absptr; + return DW_EH_PE_udata4; } unsigned X8664_ELFTargetObjectFile::getTTypeEncoding() const { diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h index f2fd49c..e21b5bf 100644 --- a/lib/Target/X86/X86TargetObjectFile.h +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -25,6 +25,12 @@ namespace llvm { getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, MachineModuleInfo *MMI, unsigned Encoding, MCStreamer &Streamer) const; + + // getCFIPersonalitySymbol - The symbol that gets passed to + // .cfi_personality. + virtual MCSymbol * + getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI) const; }; class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF { @@ -34,7 +40,7 @@ namespace llvm { :TM(tm) { } virtual unsigned getPersonalityEncoding() const; virtual unsigned getLSDAEncoding() const; - virtual unsigned getFDEEncoding() const; + virtual unsigned getFDEEncoding(bool CFI) const; virtual unsigned getTTypeEncoding() const; }; @@ -45,7 +51,7 @@ namespace llvm { :TM(tm) { } virtual unsigned getPersonalityEncoding() const; virtual unsigned getLSDAEncoding() const; - virtual unsigned getFDEEncoding() const; + virtual unsigned getFDEEncoding(bool CFI) const; virtual unsigned getTTypeEncoding() const; }; diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index fc8a07a..6bec9f9 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -30,8 +30,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include <queue> -#include <set> using namespace llvm; /// XCoreDAGToDAGISel - XCore specific code to select XCore machine @@ -49,7 +47,8 @@ namespace { Subtarget(*TM.getSubtargetImpl()) { } SDNode *Select(SDNode *N); - + SDNode *SelectBRIND(SDNode *N); + /// getI32Imm - Return a target constant with the specified value, of type /// i32. inline SDValue getI32Imm(unsigned Imm) { @@ -154,62 +153,133 @@ bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Addr, SDValue &Base, SDNode *XCoreDAGToDAGISel::Select(SDNode *N) { DebugLoc dl = N->getDebugLoc(); - EVT NVT = N->getValueType(0); - if (NVT == MVT::i32) { - switch (N->getOpcode()) { - default: break; - case ISD::Constant: { - uint64_t Val = cast<ConstantSDNode>(N)->getZExtValue(); - if (immMskBitp(N)) { - // Transformation function: get the size of a mask - // Look for the first non-zero bit - SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(Val)); - return CurDAG->getMachineNode(XCore::MKMSK_rus, dl, - MVT::i32, MskSize); - } - else if (!isUInt<16>(Val)) { - SDValue CPIdx = - CurDAG->getTargetConstantPool(ConstantInt::get( - Type::getInt32Ty(*CurDAG->getContext()), Val), - TLI.getPointerTy()); - return CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32, - MVT::Other, CPIdx, - CurDAG->getEntryNode()); - } - break; - } - case XCoreISD::LADD: { - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), - N->getOperand(2) }; - return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32, - Ops, 3); - } - case XCoreISD::LSUB: { - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), - N->getOperand(2) }; - return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32, - Ops, 3); - } - case XCoreISD::MACCU: { - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), - N->getOperand(2), N->getOperand(3) }; - return CurDAG->getMachineNode(XCore::MACCU_l4r, dl, MVT::i32, MVT::i32, - Ops, 4); - } - case XCoreISD::MACCS: { - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), - N->getOperand(2), N->getOperand(3) }; - return CurDAG->getMachineNode(XCore::MACCS_l4r, dl, MVT::i32, MVT::i32, - Ops, 4); - } - case XCoreISD::LMUL: { - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), - N->getOperand(2), N->getOperand(3) }; - return CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32, - Ops, 4); - } - // Other cases are autogenerated. + switch (N->getOpcode()) { + default: break; + case ISD::Constant: { + uint64_t Val = cast<ConstantSDNode>(N)->getZExtValue(); + if (immMskBitp(N)) { + // Transformation function: get the size of a mask + // Look for the first non-zero bit + SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(Val)); + return CurDAG->getMachineNode(XCore::MKMSK_rus, dl, + MVT::i32, MskSize); + } + else if (!isUInt<16>(Val)) { + SDValue CPIdx = + CurDAG->getTargetConstantPool(ConstantInt::get( + Type::getInt32Ty(*CurDAG->getContext()), Val), + TLI.getPointerTy()); + return CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32, + MVT::Other, CPIdx, + CurDAG->getEntryNode()); } + break; + } + case XCoreISD::LADD: { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), + N->getOperand(2) }; + return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32, + Ops, 3); + } + case XCoreISD::LSUB: { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), + N->getOperand(2) }; + return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32, + Ops, 3); + } + case XCoreISD::MACCU: { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3) }; + return CurDAG->getMachineNode(XCore::MACCU_l4r, dl, MVT::i32, MVT::i32, + Ops, 4); + } + case XCoreISD::MACCS: { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3) }; + return CurDAG->getMachineNode(XCore::MACCS_l4r, dl, MVT::i32, MVT::i32, + Ops, 4); + } + case XCoreISD::LMUL: { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3) }; + return CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32, + Ops, 4); + } + case ISD::BRIND: + if (SDNode *ResNode = SelectBRIND(N)) + return ResNode; + break; + // Other cases are autogenerated. } return SelectCode(N); } + +/// Given a chain return a new chain where any appearance of Old is replaced +/// by New. There must be at most one instruction between Old and Chain and +/// this instruction must be a TokenFactor. Returns an empty SDValue if +/// these conditions don't hold. +static SDValue +replaceInChain(SelectionDAG *CurDAG, SDValue Chain, SDValue Old, SDValue New) +{ + if (Chain == Old) + return New; + if (Chain->getOpcode() != ISD::TokenFactor) + return SDValue(); + SmallVector<SDValue, 8> Ops; + bool found = false; + for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) { + if (Chain->getOperand(i) == Old) { + Ops.push_back(New); + found = true; + } else { + Ops.push_back(Chain->getOperand(i)); + } + } + if (!found) + return SDValue(); + return CurDAG->getNode(ISD::TokenFactor, Chain->getDebugLoc(), MVT::Other, + &Ops[0], Ops.size()); +} + +SDNode *XCoreDAGToDAGISel::SelectBRIND(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + // (brind (int_xcore_checkevent (addr))) + SDValue Chain = N->getOperand(0); + SDValue Addr = N->getOperand(1); + if (Addr->getOpcode() != ISD::INTRINSIC_W_CHAIN) + return 0; + unsigned IntNo = cast<ConstantSDNode>(Addr->getOperand(1))->getZExtValue(); + if (IntNo != Intrinsic::xcore_checkevent) + return 0; + SDValue nextAddr = Addr->getOperand(2); + SDValue CheckEventChainOut(Addr.getNode(), 1); + if (!CheckEventChainOut.use_empty()) { + // If the chain out of the checkevent intrinsic is an operand of the + // indirect branch or used in a TokenFactor which is the operand of the + // indirect branch then build a new chain which uses the chain coming into + // the checkevent intrinsic instead. + SDValue CheckEventChainIn = Addr->getOperand(0); + SDValue NewChain = replaceInChain(CurDAG, Chain, CheckEventChainOut, + CheckEventChainIn); + if (!NewChain.getNode()) + return 0; + Chain = NewChain; + } + // Enable events on the thread using setsr 1 and then disable them immediately + // after with clrsr 1. If any resources owned by the thread are ready an event + // will be taken. If no resource is ready we branch to the address which was + // the operand to the checkevent intrinsic. + SDValue constOne = getI32Imm(1); + SDValue Glue = + SDValue(CurDAG->getMachineNode(XCore::SETSR_branch_u6, dl, MVT::Glue, + constOne, Chain), 0); + Glue = + SDValue(CurDAG->getMachineNode(XCore::CLRSR_branch_u6, dl, MVT::Glue, + constOne, Glue), 0); + if (nextAddr->getOpcode() == XCoreISD::PCRelativeWrapper && + nextAddr->getOperand(0)->getOpcode() == ISD::TargetBlockAddress) { + return CurDAG->SelectNodeTo(N, XCore::BRFU_lu6, MVT::Other, + nextAddr->getOperand(0), Glue); + } + return CurDAG->SelectNodeTo(N, XCore::BAU_1r, MVT::Other, nextAddr, Glue); +} diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 4817787..5987e8b 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -37,8 +37,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/VectorExtras.h" -#include <queue> -#include <set> using namespace llvm; const char *XCoreTargetLowering:: @@ -967,7 +965,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. - // The InFlag in necessary since all emited instructions must be + // The InFlag in necessary since all emitted instructions must be // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index ecdd4cb..789546e 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -308,6 +308,16 @@ multiclass FU6_LU6<string OpcStr, SDNode OpNode> { !strconcat(OpcStr, " $b"), [(OpNode immU16:$b)]>; } +multiclass FU6_LU6_int<string OpcStr, Intrinsic Int> { + def _u6: _FU6< + (outs), (ins i32imm:$b), + !strconcat(OpcStr, " $b"), + [(Int immU6:$b)]>; + def _lu6: _FLU6< + (outs), (ins i32imm:$b), + !strconcat(OpcStr, " $b"), + [(Int immU16:$b)]>; +} multiclass FU6_LU6_np<string OpcStr> { def _u6: _FU6< @@ -638,8 +648,8 @@ defm RETSP : FU6_LU6<"retsp", XCoreRetsp>; } } -// TODO extdp, kentsp, krestsp, blat, setsr -// clrsr, getsr, kalli +// TODO extdp, kentsp, krestsp, blat +// getsr, kalli let isBranch = 1, isTerminator = 1, isBarrier = 1 in { def BRBU_u6 : _FU6< (outs), @@ -678,6 +688,17 @@ def LDAWCP_lu6: _FLRU6< "ldaw r11, cp[$a]", [(set R11, ADDRcpii:$a)]>; +defm SETSR : FU6_LU6_int<"setsr", int_xcore_setsr>; + +defm CLRSR : FU6_LU6_int<"clrsr", int_xcore_clrsr>; + +// setsr may cause a branch if it is used to enable events. clrsr may +// branch if it is executed while events are enabled. +let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in { +defm SETSR_branch : FU6_LU6_np<"setsr">; +defm CLRSR_branch : FU6_LU6_np<"clrsr">; +} + // U10 // TODO ldwcpl, blacp @@ -718,7 +739,7 @@ def BL_lu10 : _FLU10< } // Two operand short -// TODO getr, getst +// TODO eet, eef, testwct, tsetmr, sext (reg), zext (reg) def NOT : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b), "not $dst, $b", [(set GRRegs:$dst, (not GRRegs:$b))]>; @@ -727,8 +748,6 @@ def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b), "neg $dst, $b", [(set GRRegs:$dst, (ineg GRRegs:$b))]>; -// TODO setd, eet, eef, testwct, tinitpc, tinitdp, -// tinitsp, tinitcp, tsetmr, sext (reg), zext (reg) let Constraints = "$src1 = $dst" in { let neverHasSideEffects = 1 in def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2), @@ -816,9 +835,29 @@ def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), "setd res[$r], $val", [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>; +def GETST_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), + "getst $dst, res[$r]", + [(set GRRegs:$dst, (int_xcore_getst GRRegs:$r))]>; + +def INITSP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src), + "init t[$t]:sp, $src", + [(int_xcore_initsp GRRegs:$t, GRRegs:$src)]>; + +def INITPC_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src), + "init t[$t]:pc, $src", + [(int_xcore_initpc GRRegs:$t, GRRegs:$src)]>; + +def INITCP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src), + "init t[$t]:cp, $src", + [(int_xcore_initcp GRRegs:$t, GRRegs:$src)]>; + +def INITDP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src), + "init t[$t]:dp, $src", + [(int_xcore_initdp GRRegs:$t, GRRegs:$src)]>; + // Two operand long -// TODO setclk, setrdy, setpsc, endin, peek, -// getd, testlcl, tinitlr, getps, setps +// TODO endin, peek, +// getd, testlcl def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), "bitrev $dst, $src", [(set GRRegs:$dst, (int_xcore_bitrev GRRegs:$src))]>; @@ -839,10 +878,41 @@ def SETTW_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val), "settw res[$r], $val", [(int_xcore_settw GRRegs:$r, GRRegs:$val)]>; +def GETPS_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), + "get $dst, ps[$src]", + [(set GRRegs:$dst, (int_xcore_getps GRRegs:$src))]>; + +def SETPS_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2), + "set ps[$src1], $src2", + [(int_xcore_setps GRRegs:$src1, GRRegs:$src2)]>; + +def INITLR_l2r : _FL2R<(outs), (ins GRRegs:$t, GRRegs:$src), + "init t[$t]:lr, $src", + [(int_xcore_initlr GRRegs:$t, GRRegs:$src)]>; + +def SETCLK_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2), + "setclk res[$src1], $src2", + [(int_xcore_setclk GRRegs:$src1, GRRegs:$src2)]>; + +def SETRDY_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2), + "setrdy res[$src1], $src2", + [(int_xcore_setrdy GRRegs:$src1, GRRegs:$src2)]>; + +def SETPSC_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2), + "setpsc res[$src1], $src2", + [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>; + // One operand short -// TODO edu, eeu, waitet, waitef, tstart, msync, mjoin, clrtp +// TODO edu, eeu, waitet, waitef, tstart, clrtp // setdp, setcp, setev, kcall // dgetreg +def MSYNC_1r : _F1R<(outs), (ins GRRegs:$i), + "msync res[$i]", + [(int_xcore_msync GRRegs:$i)]>; +def MJOIN_1r : _F1R<(outs), (ins GRRegs:$i), + "mjoin res[$i]", + [(int_xcore_mjoin GRRegs:$i)]>; + let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in def BAU_1r : _F1R<(outs), (ins GRRegs:$addr), "bau $addr", @@ -899,7 +969,7 @@ def EEU_1r : _F1R<(outs), (ins GRRegs:$r), [(int_xcore_eeu GRRegs:$r)]>; // Zero operand short -// TODO ssync, freet, ldspc, stspc, ldssr, stssr, ldsed, stsed, +// TODO freet, ldspc, stspc, ldssr, stssr, ldsed, stsed, // stet, geted, getet, getkep, getksp, setkep, getid, kret, dcall, dret, // dentsp, drestsp @@ -910,6 +980,10 @@ def GETID_0R : _F0R<(outs), (ins), "get r11, id", [(set R11, (int_xcore_getid))]>; +def SSYNC_0r : _F0R<(outs), (ins), + "ssync", + [(int_xcore_ssync)]>; + let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1, hasSideEffects = 1 in def WAITEU_0R : _F0R<(outs), (ins), diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index 56c0879..0287a51 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -104,6 +104,11 @@ XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { return TFI->hasFP(MF); } +bool +XCoreRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { + return false; +} + // This function eliminates ADJCALLSTACKDOWN, // ADJCALLSTACKUP pseudo instructions void XCoreRegisterInfo:: diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index 2185755..770483b 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -48,6 +48,8 @@ public: bool requiresRegisterScavenging(const MachineFunction &MF) const; + bool useFPForScavengingIndex(const MachineFunction &MF) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 0c650cf..54a7f67 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -771,8 +771,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); - // Loop over the argument list, transfering uses of the old arguments over to - // the new arguments, also transfering over the names as well. + // Loop over the argument list, transferring uses of the old arguments over to + // the new arguments, also transferring over the names as well. // for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I) { diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index efdeec5..179b150 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -20,5 +20,4 @@ add_llvm_library(LLVMipo PruneEH.cpp StripDeadPrototypes.cpp StripSymbols.cpp - StructRetPromotion.cpp ) diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index b423221..d4eaf0c 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -49,7 +49,7 @@ namespace { /// Struct that represents (part of) either a return value or a function /// argument. Used so that arguments and return values can be used - /// interchangably. + /// interchangeably. struct RetOrArg { RetOrArg(const Function *F, unsigned Idx, bool IsArg) : F(F), Idx(Idx), IsArg(IsArg) {} @@ -273,8 +273,8 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { // function empty. NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList()); - // Loop over the argument list, transfering uses of the old arguments over to - // the new arguments, also transfering over the names as well. While we're at + // Loop over the argument list, transferring uses of the old arguments over to + // the new arguments, also transferring over the names as well. While we're at // it, remove the dead arguments from the DeadArguments list. // for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(), @@ -294,7 +294,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { /// instead. bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn) { - if (Fn.isDeclaration()) + if (Fn.isDeclaration() || Fn.mayBeOverridden()) return false; // Functions with local linkage should already have been handled. @@ -379,7 +379,7 @@ DAE::Liveness DAE::SurveyUse(Value::const_use_iterator U, // The value is returned from a function. It's only live when the // function's return value is live. We use RetValNum here, for the case // that U is really a use of an insertvalue instruction that uses the - // orginal Use. + // original Use. RetOrArg Use = CreateRet(RI->getParent()->getParent(), RetValNum); // We might be live, depending on the liveness of Use. return MarkIfNotLive(Use, MaybeLiveUses); @@ -894,8 +894,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); - // Loop over the argument list, transfering uses of the old arguments over to - // the new arguments, also transfering over the names as well. + // Loop over the argument list, transferring uses of the old arguments over to + // the new arguments, also transferring over the names as well. i = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I, ++i) diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index d4cb712..ded58ac 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -21,6 +21,7 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" +#include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -54,6 +55,7 @@ STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated"); STATISTIC(NumNestRemoved , "Number of nest attributes removed"); STATISTIC(NumAliasesResolved, "Number of global aliases resolved"); STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); +STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed"); namespace { struct GlobalStatus; @@ -77,6 +79,7 @@ namespace { bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI, const SmallPtrSet<const PHINode*, 16> &PHIUsers, const GlobalStatus &GS); + bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn); }; } @@ -1191,9 +1194,11 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, const StructType *ST = cast<StructType>(cast<PointerType>(PN->getType())->getElementType()); - Result = + PHINode *NewPN = PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), + PN->getNumIncomingValues(), PN->getName()+".f"+Twine(FieldNo), PN); + Result = NewPN; PHIsToRewrite.push_back(std::make_pair(PN, FieldNo)); } else { llvm_unreachable("Unknown usable value"); @@ -1940,36 +1945,24 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { return Changed; } -/// FindGlobalCtors - Find the llvm.globalctors list, verifying that all +/// FindGlobalCtors - Find the llvm.global_ctors list, verifying that all /// initializers have an init priority of 65535. GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); if (GV == 0) return 0; - // Found it, verify it's an array of { int, void()* }. - const ArrayType *ATy =dyn_cast<ArrayType>(GV->getType()->getElementType()); - if (!ATy) return 0; - const StructType *STy = dyn_cast<StructType>(ATy->getElementType()); - if (!STy || STy->getNumElements() != 2 || - !STy->getElementType(0)->isIntegerTy(32)) return 0; - const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1)); - if (!PFTy) return 0; - const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType()); - if (!FTy || !FTy->getReturnType()->isVoidTy() || - FTy->isVarArg() || FTy->getNumParams() != 0) - return 0; - // Verify that the initializer is simple enough for us to handle. We are // only allowed to optimize the initializer if it is unique. if (!GV->hasUniqueInitializer()) return 0; - - ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer()); - if (!CA) return 0; - + + if (isa<ConstantAggregateZero>(GV->getInitializer())) + return GV; + ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { - ConstantStruct *CS = dyn_cast<ConstantStruct>(*i); - if (CS == 0) return 0; - + if (isa<ConstantAggregateZero>(*i)) + continue; + ConstantStruct *CS = cast<ConstantStruct>(*i); if (isa<ConstantPointerNull>(CS->getOperand(1))) continue; @@ -1978,8 +1971,8 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { return 0; // Init priority must be standard. - ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0)); - if (!CI || CI->getZExtValue() != 65535) + ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0)); + if (CI->getZExtValue() != 65535) return 0; } @@ -1989,6 +1982,8 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { /// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand, /// return a list of the functions and null terminator as a vector. static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) { + if (GV->getInitializer()->isNullValue()) + return std::vector<Function*>(); ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); std::vector<Function*> Result; Result.reserve(CA->getNumOperands()); @@ -2019,7 +2014,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, const PointerType *PFTy = PointerType::getUnqual(FTy); CSVals[1] = Constant::getNullValue(PFTy); CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), - 2147483647); + 0x7fffffff); } CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false)); } @@ -2696,12 +2691,126 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { return Changed; } +static Function *FindCXAAtExit(Module &M) { + Function *Fn = M.getFunction("__cxa_atexit"); + + if (!Fn) + return 0; + + const FunctionType *FTy = Fn->getFunctionType(); + + // Checking that the function has the right return type, the right number of + // parameters and that they all have pointer types should be enough. + if (!FTy->getReturnType()->isIntegerTy() || + FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return 0; + + return Fn; +} + +/// cxxDtorIsEmpty - Returns whether the given function is an empty C++ +/// destructor and can therefore be eliminated. +/// Note that we assume that other optimization passes have already simplified +/// the code so we only look for a function with a single basic block, where +/// the only allowed instructions are 'ret' or 'call' to empty C++ dtor. +static bool cxxDtorIsEmpty(const Function &Fn, + SmallPtrSet<const Function *, 8> &CalledFunctions) { + // FIXME: We could eliminate C++ destructors if they're readonly/readnone and + // nounwind, but that doesn't seem worth doing. + if (Fn.isDeclaration()) + return false; + + if (++Fn.begin() != Fn.end()) + return false; + + const BasicBlock &EntryBlock = Fn.getEntryBlock(); + for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end(); + I != E; ++I) { + if (const CallInst *CI = dyn_cast<CallInst>(I)) { + // Ignore debug intrinsics. + if (isa<DbgInfoIntrinsic>(CI)) + continue; + + const Function *CalledFn = CI->getCalledFunction(); + + if (!CalledFn) + return false; + + SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions); + + // Don't treat recursive functions as empty. + if (!NewCalledFunctions.insert(CalledFn)) + return false; + + if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions)) + return false; + } else if (isa<ReturnInst>(*I)) + return true; + else + return false; + } + + return false; +} + +bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { + /// Itanium C++ ABI p3.3.5: + /// + /// After constructing a global (or local static) object, that will require + /// destruction on exit, a termination function is registered as follows: + /// + /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d ); + /// + /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the + /// call f(p) when DSO d is unloaded, before all such termination calls + /// registered before this one. It returns zero if registration is + /// successful, nonzero on failure. + + // This pass will look for calls to __cxa_atexit where the function is trivial + // and remove them. + bool Changed = false; + + for (Function::use_iterator I = CXAAtExitFn->use_begin(), + E = CXAAtExitFn->use_end(); I != E;) { + // We're only interested in calls. Theoretically, we could handle invoke + // instructions as well, but neither llvm-gcc nor clang generate invokes + // to __cxa_atexit. + CallInst *CI = dyn_cast<CallInst>(*I++); + if (!CI) + continue; + + Function *DtorFn = + dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts()); + if (!DtorFn) + continue; + + SmallPtrSet<const Function *, 8> CalledFunctions; + if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions)) + continue; + + // Just remove the call. + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); + CI->eraseFromParent(); + + ++NumCXXDtorsRemoved; + + Changed |= true; + } + + return Changed; +} + bool GlobalOpt::runOnModule(Module &M) { bool Changed = false; // Try to find the llvm.globalctors list. GlobalVariable *GlobalCtors = FindGlobalCtors(M); + Function *CXAAtExitFn = FindCXAAtExit(M); + bool LocalChange = true; while (LocalChange) { LocalChange = false; @@ -2718,6 +2827,11 @@ bool GlobalOpt::runOnModule(Module &M) { // Resolve aliases, when possible. LocalChange |= OptimizeGlobalAliases(M); + + // Try to remove trivial global destructors. + if (CXAAtExitFn) + LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn); + Changed |= LocalChange; } diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp index c7c2939..25c0134 100644 --- a/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -186,7 +186,7 @@ bool IPCP::PropagateConstantReturn(Function &F) { // Find the returned value Value *V; if (!STy) - V = RI->getOperand(i); + V = RI->getOperand(0); else V = FindInsertedValue(RI->getOperand(0), i); diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index fbe90ce..21dcb51 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -45,7 +45,6 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeStripDebugDeclarePass(Registry); initializeStripDeadDebugInfoPass(Registry); initializeStripNonDebugSymbolsPass(Registry); - initializeSRETPromotionPass(Registry); } void LLVMInitializeIPO(LLVMPassRegistryRef R) { diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 37eafd7..57f3e77 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -29,7 +29,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include <set> using namespace llvm; STATISTIC(NumInlined, "Number of functions inlined"); diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp index 9b9ebad..7cb1d18 100644 --- a/lib/Transforms/IPO/Internalize.cpp +++ b/lib/Transforms/IPO/Internalize.cpp @@ -126,6 +126,8 @@ bool InternalizePass::runOnModule(Module &M) { // FIXME: maybe use private linkage? for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) if (!I->isDeclaration() && // Function must be defined here + // Available externally is really just a "declaration with a body". + !I->hasAvailableExternallyLinkage() && !I->hasLocalLinkage() && // Can't already have internal linkage !ExternalNames.count(I->getName())) {// Not marked to keep external? I->setLinkage(GlobalValue::InternalLinkage); @@ -144,9 +146,6 @@ bool InternalizePass::runOnModule(Module &M) { // Never internalize anchors used by the machine module info, else the info // won't find them. (see MachineModuleInfo.) - ExternalNames.insert("llvm.dbg.compile_units"); - ExternalNames.insert("llvm.dbg.global_variables"); - ExternalNames.insert("llvm.dbg.subprograms"); ExternalNames.insert("llvm.global_ctors"); ExternalNames.insert("llvm.global_dtors"); ExternalNames.insert("llvm.noinline"); diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp index b545f0b..52ecf17 100644 --- a/lib/Transforms/IPO/LowerSetJmp.cpp +++ b/lib/Transforms/IPO/LowerSetJmp.cpp @@ -430,7 +430,7 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst) // This PHI node will be in the new block created from the // splitBasicBlock call. - PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()), + PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()), 2, "SetJmpReturn", Inst); // Coming from a call to setjmp, the return is 0. diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index cccffca..f741443 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -55,6 +55,7 @@ #include "llvm/Instructions.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" +#include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" @@ -125,7 +126,7 @@ private: const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0); const ComparableFunction ComparableFunction::TombstoneKey = ComparableFunction(1); -TargetData * const ComparableFunction::LookupOnly = (TargetData*)(-1); +TargetData *const ComparableFunction::LookupOnly = (TargetData*)(-1); } @@ -212,7 +213,7 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1, return false; } - switch(Ty1->getTypeID()) { + switch (Ty1->getTypeID()) { default: llvm_unreachable("Unknown type!"); // Fall through in Release mode. diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 2afd029..d9d1d10 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -95,7 +95,7 @@ Function* PartialInliner::unswitchFunction(Function* F) { PHINode* OldPhi = dyn_cast<PHINode>(I); if (!OldPhi) break; - PHINode* retPhi = PHINode::Create(OldPhi->getType(), "", Ins); + PHINode* retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins); OldPhi->replaceAllUsesWith(retPhi); Ins = newReturnBlock->getFirstNonPHI(); diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index d91c2c4..9470180 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -27,7 +27,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CFG.h" -#include <set> #include <algorithm> using namespace llvm; diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp deleted file mode 100644 index 584deac..0000000 --- a/lib/Transforms/IPO/StructRetPromotion.cpp +++ /dev/null @@ -1,357 +0,0 @@ -//===-- StructRetPromotion.cpp - Promote sret arguments -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass finds functions that return a struct (using a pointer to the struct -// as the first argument of the function, marked with the 'sret' attribute) and -// replaces them with a new function that simply returns each of the elements of -// that struct (using multiple return values). -// -// This pass works under a number of conditions: -// 1. The returned struct must not contain other structs -// 2. The returned struct must only be used to load values from -// 3. The placeholder struct passed in is the result of an alloca -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "sretpromotion" -#include "llvm/Transforms/IPO.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/CallGraphSCCPass.h" -#include "llvm/Instructions.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/Debug.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -STATISTIC(NumRejectedSRETUses , "Number of sret rejected due to unexpected uses"); -STATISTIC(NumSRET , "Number of sret promoted"); -namespace { - /// SRETPromotion - This pass removes sret parameter and updates - /// function to use multiple return value. - /// - struct SRETPromotion : public CallGraphSCCPass { - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - CallGraphSCCPass::getAnalysisUsage(AU); - } - - virtual bool runOnSCC(CallGraphSCC &SCC); - static char ID; // Pass identification, replacement for typeid - SRETPromotion() : CallGraphSCCPass(ID) { - initializeSRETPromotionPass(*PassRegistry::getPassRegistry()); - } - - private: - CallGraphNode *PromoteReturn(CallGraphNode *CGN); - bool isSafeToUpdateAllCallers(Function *F); - Function *cloneFunctionBody(Function *F, const StructType *STy); - CallGraphNode *updateCallSites(Function *F, Function *NF); - }; -} - -char SRETPromotion::ID = 0; -INITIALIZE_PASS_BEGIN(SRETPromotion, "sretpromotion", - "Promote sret arguments to multiple ret values", false, false) -INITIALIZE_AG_DEPENDENCY(CallGraph) -INITIALIZE_PASS_END(SRETPromotion, "sretpromotion", - "Promote sret arguments to multiple ret values", false, false) - -Pass *llvm::createStructRetPromotionPass() { - return new SRETPromotion(); -} - -bool SRETPromotion::runOnSCC(CallGraphSCC &SCC) { - bool Changed = false; - - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) - if (CallGraphNode *NewNode = PromoteReturn(*I)) { - SCC.ReplaceNode(*I, NewNode); - Changed = true; - } - - return Changed; -} - -/// PromoteReturn - This method promotes function that uses StructRet paramater -/// into a function that uses multiple return values. -CallGraphNode *SRETPromotion::PromoteReturn(CallGraphNode *CGN) { - Function *F = CGN->getFunction(); - - if (!F || F->isDeclaration() || !F->hasLocalLinkage()) - return 0; - - // Make sure that function returns struct. - if (F->arg_size() == 0 || !F->hasStructRetAttr() || F->doesNotReturn()) - return 0; - - DEBUG(dbgs() << "SretPromotion: Looking at sret function " - << F->getName() << "\n"); - - assert(F->getReturnType()->isVoidTy() && "Invalid function return type"); - Function::arg_iterator AI = F->arg_begin(); - const llvm::PointerType *FArgType = dyn_cast<PointerType>(AI->getType()); - assert(FArgType && "Invalid sret parameter type"); - const llvm::StructType *STy = - dyn_cast<StructType>(FArgType->getElementType()); - assert(STy && "Invalid sret parameter element type"); - - // Check if it is ok to perform this promotion. - if (isSafeToUpdateAllCallers(F) == false) { - DEBUG(dbgs() << "SretPromotion: Not all callers can be updated\n"); - ++NumRejectedSRETUses; - return 0; - } - - DEBUG(dbgs() << "SretPromotion: sret argument will be promoted\n"); - ++NumSRET; - // [1] Replace use of sret parameter - AllocaInst *TheAlloca = new AllocaInst(STy, NULL, "mrv", - F->getEntryBlock().begin()); - Value *NFirstArg = F->arg_begin(); - NFirstArg->replaceAllUsesWith(TheAlloca); - - // [2] Find and replace ret instructions - for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) - for(BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) { - Instruction *I = BI; - ++BI; - if (isa<ReturnInst>(I)) { - Value *NV = new LoadInst(TheAlloca, "mrv.ld", I); - ReturnInst *NR = ReturnInst::Create(F->getContext(), NV, I); - I->replaceAllUsesWith(NR); - I->eraseFromParent(); - } - } - - // [3] Create the new function body and insert it into the module. - Function *NF = cloneFunctionBody(F, STy); - - // [4] Update all call sites to use new function - CallGraphNode *NF_CFN = updateCallSites(F, NF); - - CallGraph &CG = getAnalysis<CallGraph>(); - NF_CFN->stealCalledFunctionsFrom(CG[F]); - - delete CG.removeFunctionFromModule(F); - return NF_CFN; -} - -// Check if it is ok to perform this promotion. -bool SRETPromotion::isSafeToUpdateAllCallers(Function *F) { - - if (F->use_empty()) - // No users. OK to modify signature. - return true; - - for (Value::use_iterator FnUseI = F->use_begin(), FnUseE = F->use_end(); - FnUseI != FnUseE; ++FnUseI) { - // The function is passed in as an argument to (possibly) another function, - // we can't change it! - CallSite CS(*FnUseI); - Instruction *Call = CS.getInstruction(); - // The function is used by something else than a call or invoke instruction, - // we can't change it! - if (!Call || !CS.isCallee(FnUseI)) - return false; - CallSite::arg_iterator AI = CS.arg_begin(); - Value *FirstArg = *AI; - - if (!isa<AllocaInst>(FirstArg)) - return false; - - // Check FirstArg's users. - for (Value::use_iterator ArgI = FirstArg->use_begin(), - ArgE = FirstArg->use_end(); ArgI != ArgE; ++ArgI) { - User *U = *ArgI; - // If FirstArg user is a CallInst that does not correspond to current - // call site then this function F is not suitable for sret promotion. - if (CallInst *CI = dyn_cast<CallInst>(U)) { - if (CI != Call) - return false; - } - // If FirstArg user is a GEP whose all users are not LoadInst then - // this function F is not suitable for sret promotion. - else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { - // TODO : Use dom info and insert PHINodes to collect get results - // from multiple call sites for this GEP. - if (GEP->getParent() != Call->getParent()) - return false; - for (Value::use_iterator GEPI = GEP->use_begin(), GEPE = GEP->use_end(); - GEPI != GEPE; ++GEPI) - if (!isa<LoadInst>(*GEPI)) - return false; - } - // Any other FirstArg users make this function unsuitable for sret - // promotion. - else - return false; - } - } - - return true; -} - -/// cloneFunctionBody - Create a new function based on F and -/// insert it into module. Remove first argument. Use STy as -/// the return type for new function. -Function *SRETPromotion::cloneFunctionBody(Function *F, - const StructType *STy) { - - const FunctionType *FTy = F->getFunctionType(); - std::vector<const Type*> Params; - - // Attributes - Keep track of the parameter attributes for the arguments. - SmallVector<AttributeWithIndex, 8> AttributesVec; - const AttrListPtr &PAL = F->getAttributes(); - - // Add any return attributes. - if (Attributes attrs = PAL.getRetAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); - - // Skip first argument. - Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); - ++I; - // 0th parameter attribute is reserved for return type. - // 1th parameter attribute is for first 1st sret argument. - unsigned ParamIndex = 2; - while (I != E) { - Params.push_back(I->getType()); - if (Attributes Attrs = PAL.getParamAttributes(ParamIndex)) - AttributesVec.push_back(AttributeWithIndex::get(ParamIndex - 1, Attrs)); - ++I; - ++ParamIndex; - } - - // Add any fn attributes. - if (Attributes attrs = PAL.getFnAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); - - - FunctionType *NFTy = FunctionType::get(STy, Params, FTy->isVarArg()); - Function *NF = Function::Create(NFTy, F->getLinkage()); - NF->takeName(F); - NF->copyAttributesFrom(F); - NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); - F->getParent()->getFunctionList().insert(F, NF); - NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); - - // Replace arguments - I = F->arg_begin(); - E = F->arg_end(); - Function::arg_iterator NI = NF->arg_begin(); - ++I; - while (I != E) { - I->replaceAllUsesWith(NI); - NI->takeName(I); - ++I; - ++NI; - } - - return NF; -} - -/// updateCallSites - Update all sites that call F to use NF. -CallGraphNode *SRETPromotion::updateCallSites(Function *F, Function *NF) { - CallGraph &CG = getAnalysis<CallGraph>(); - SmallVector<Value*, 16> Args; - - // Attributes - Keep track of the parameter attributes for the arguments. - SmallVector<AttributeWithIndex, 8> ArgAttrsVec; - - // Get a new callgraph node for NF. - CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); - - while (!F->use_empty()) { - CallSite CS(*F->use_begin()); - Instruction *Call = CS.getInstruction(); - - const AttrListPtr &PAL = F->getAttributes(); - // Add any return attributes. - if (Attributes attrs = PAL.getRetAttributes()) - ArgAttrsVec.push_back(AttributeWithIndex::get(0, attrs)); - - // Copy arguments, however skip first one. - CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); - Value *FirstCArg = *AI; - ++AI; - // 0th parameter attribute is reserved for return type. - // 1th parameter attribute is for first 1st sret argument. - unsigned ParamIndex = 2; - while (AI != AE) { - Args.push_back(*AI); - if (Attributes Attrs = PAL.getParamAttributes(ParamIndex)) - ArgAttrsVec.push_back(AttributeWithIndex::get(ParamIndex - 1, Attrs)); - ++ParamIndex; - ++AI; - } - - // Add any function attributes. - if (Attributes attrs = PAL.getFnAttributes()) - ArgAttrsVec.push_back(AttributeWithIndex::get(~0, attrs)); - - AttrListPtr NewPAL = AttrListPtr::get(ArgAttrsVec.begin(), ArgAttrsVec.end()); - - // Build new call instruction. - Instruction *New; - if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { - New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args.begin(), Args.end(), "", Call); - cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); - cast<InvokeInst>(New)->setAttributes(NewPAL); - } else { - New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); - cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); - cast<CallInst>(New)->setAttributes(NewPAL); - if (cast<CallInst>(Call)->isTailCall()) - cast<CallInst>(New)->setTailCall(); - } - Args.clear(); - ArgAttrsVec.clear(); - New->takeName(Call); - - // Update the callgraph to know that the callsite has been transformed. - CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; - CalleeNode->removeCallEdgeFor(Call); - CalleeNode->addCalledFunction(New, NF_CGN); - - // Update all users of sret parameter to extract value using extractvalue. - for (Value::use_iterator UI = FirstCArg->use_begin(), - UE = FirstCArg->use_end(); UI != UE; ) { - User *U2 = *UI++; - CallInst *C2 = dyn_cast<CallInst>(U2); - if (C2 && (C2 == Call)) - continue; - - GetElementPtrInst *UGEP = cast<GetElementPtrInst>(U2); - ConstantInt *Idx = cast<ConstantInt>(UGEP->getOperand(2)); - Value *GR = ExtractValueInst::Create(New, Idx->getZExtValue(), - "evi", UGEP); - while(!UGEP->use_empty()) { - // isSafeToUpdateAllCallers has checked that all GEP uses are - // LoadInsts - LoadInst *L = cast<LoadInst>(*UGEP->use_begin()); - L->replaceAllUsesWith(GR); - L->eraseFromParent(); - } - UGEP->eraseFromParent(); - continue; - } - Call->eraseFromParent(); - } - - return NF_CGN; -} - diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 9c2969c..9c70cf8 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -11,6 +11,7 @@ #define INSTCOMBINE_INSTCOMBINE_H #include "InstCombineWorklist.h" +#include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/IRBuilder.h" @@ -69,7 +70,6 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner : public FunctionPass, public InstVisitor<InstCombiner, Instruction*> { TargetData *TD; - bool MustPreserveLCSSA; bool MadeIRChange; public: /// Worklist - All of the instructions that need to be simplified. @@ -217,8 +217,8 @@ private: Instruction *transformCallThroughTrampoline(CallSite CS); Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI, bool DoXform = true); + Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS); - DbgDeclareInst *hasOneUsePlusDeclare(Value *V); Value *EmitGEPOffset(User *GEP); public: @@ -247,7 +247,10 @@ public: // segment of unreachable code, so just clobber the instruction. if (&I == V) V = UndefValue::get(I.getType()); - + + DEBUG(errs() << "IC: Replacing " << I << "\n" + " with " << *V << '\n'); + I.replaceAllUsesWith(V); return &I; } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 7986d1a..a08446e 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -14,6 +14,7 @@ #include "InstCombine.h" #include "llvm/Intrinsics.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Support/ConstantRange.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; @@ -330,7 +331,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, /// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is -/// true, otherwise (V < Lo || V >= Hi). In pratice, we emit the more efficient +/// true, otherwise (V < Lo || V >= Hi). In practice, we emit the more efficient /// (V-Lo) <u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates /// whether to treat the V, Lo and HI as signed or not. IB is the location to /// insert new instructions. @@ -755,6 +756,54 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *NewOr = Builder->CreateOr(Val, Val2); return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } + + // (icmp slt A, 0) & (icmp slt B, 0) --> (icmp slt (A&B), 0) + if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) { + Value *NewAnd = Builder->CreateAnd(Val, Val2); + return Builder->CreateICmp(LHSCC, NewAnd, LHSCst); + } + + // (icmp sgt A, -1) & (icmp sgt B, -1) --> (icmp sgt (A|B), -1) + if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return Builder->CreateICmp(LHSCC, NewOr, LHSCst); + } + } + + // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2 + // where CMAX is the all ones value for the truncated type, + // iff the lower bits of C2 and CA are zero. + if (LHSCC == RHSCC && ICmpInst::isEquality(LHSCC) && + LHS->hasOneUse() && RHS->hasOneUse()) { + Value *V; + ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0; + + // (trunc x) == C1 & (and x, CA) == C2 + if (match(Val2, m_Trunc(m_Value(V))) && + match(Val, m_And(m_Specific(V), m_ConstantInt(AndCst)))) { + SmallCst = RHSCst; + BigCst = LHSCst; + } + // (and x, CA) == C2 & (trunc x) == C1 + else if (match(Val, m_Trunc(m_Value(V))) && + match(Val2, m_And(m_Specific(V), m_ConstantInt(AndCst)))) { + SmallCst = LHSCst; + BigCst = RHSCst; + } + + if (SmallCst && BigCst) { + unsigned BigBitSize = BigCst->getType()->getBitWidth(); + unsigned SmallBitSize = SmallCst->getType()->getBitWidth(); + + // Check that the low bits are zero. + APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize); + if ((Low & AndCst->getValue()) == 0 && (Low & BigCst->getValue()) == 0) { + Value *NewAnd = Builder->CreateAnd(V, Low | AndCst->getValue()); + APInt N = SmallCst->getValue().zext(BigBitSize) | BigCst->getValue(); + Value *NewVal = ConstantInt::get(AndCst->getType()->getContext(), N); + return Builder->CreateICmp(LHSCC, NewAnd, NewVal); + } + } } // From here on, we only handle: @@ -767,7 +816,17 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) return 0; - + + // Make a constant range that's the intersection of the two icmp ranges. + // If the intersection is empty, we know that the result is false. + ConstantRange LHSRange = + ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue()); + ConstantRange RHSRange = + ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue()); + + if (LHSRange.intersectWith(RHSRange).isEmptySet()) + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); + // We can't fold (ugt x, C) & (sgt x, C2). if (!PredicatesFoldable(LHSCC, RHSCC)) return 0; @@ -800,10 +859,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { case ICmpInst::ICMP_EQ: switch (RHSCC) { default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false - case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false - case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false - return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 @@ -851,9 +906,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { case ICmpInst::ICMP_SLT: switch (RHSCC) { default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false - case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false - return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change break; case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 @@ -1438,6 +1490,18 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *NewOr = Builder->CreateOr(Val, Val2); return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } + + // (icmp slt A, 0) | (icmp slt B, 0) --> (icmp slt (A|B), 0) + if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return Builder->CreateICmp(LHSCC, NewOr, LHSCst); + } + + // (icmp sgt A, -1) | (icmp sgt B, -1) --> (icmp sgt (A&B), -1) + if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) { + Value *NewAnd = Builder->CreateAnd(Val, Val2); + return Builder->CreateICmp(LHSCC, NewAnd, LHSCst); + } } // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1) @@ -1975,7 +2039,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } } } - + + // or(sext(A), B) -> A ? -1 : B where A is an i1 + // or(A, sext(B)) -> B ? -1 : A where B is an i1 + if (match(Op0, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1)) + return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op1); + if (match(Op1, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1)) + return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op0); + // Note: If we've gotten to the point of visiting the outer OR, then the // inner one couldn't be simplified. If it was a constant, then it won't // be simplified by a later pass either, so we try swapping the inner/outer diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 0e46450..726105f 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -475,7 +475,36 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } } break; - case Intrinsic::umul_with_overflow: + case Intrinsic::umul_with_overflow: { + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); + unsigned BitWidth = cast<IntegerType>(LHS->getType())->getBitWidth(); + APInt Mask = APInt::getAllOnesValue(BitWidth); + + APInt LHSKnownZero(BitWidth, 0); + APInt LHSKnownOne(BitWidth, 0); + ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); + APInt RHSKnownZero(BitWidth, 0); + APInt RHSKnownOne(BitWidth, 0); + ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); + + // Get the largest possible values for each operand. + APInt LHSMax = ~LHSKnownZero; + APInt RHSMax = ~RHSKnownZero; + + // If multiplying the maximum values does not overflow then we can turn + // this into a plain NUW mul. + bool Overflow; + LHSMax.umul_ov(RHSMax, Overflow); + if (!Overflow) { + Value *Mul = Builder->CreateNUWMul(LHS, RHS, "umul_with_overflow"); + Constant *V[] = { + UndefValue::get(LHS->getType()), + Builder->getFalse() + }; + Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + return InsertValueInst::Create(Struct, Mul, 0); + } + } // FALL THROUGH case Intrinsic::smul_with_overflow: // Canonicalize constants into the RHS. if (isa<Constant>(II->getArgOperand(0)) && @@ -508,11 +537,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: - case Intrinsic::x86_sse_loadu_ps: - case Intrinsic::x86_sse2_loadu_pd: - case Intrinsic::x86_sse2_loadu_dq: - // Turn PPC lvx -> load if the pointer is known aligned. - // Turn X86 loadups -> load if the pointer is known aligned. + // Turn PPC lvx -> load if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) { Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); @@ -731,9 +756,13 @@ protected: dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) { if (SizeCI->isAllOnesValue()) return true; - if (isString) - return SizeCI->getZExtValue() >= - GetStringLength(CI->getArgOperand(SizeArgOp)); + if (isString) { + uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp)); + // If the length is 0 we don't know how long it is and so we can't + // remove the check. + if (Len == 0) return false; + return SizeCI->getZExtValue() >= Len; + } if (ConstantInt *Arg = dyn_cast<ConstantInt>( CI->getArgOperand(SizeArgOp))) return SizeCI->getZExtValue() >= Arg->getZExtValue(); diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index b432641..6f70de8 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -87,10 +87,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // If the allocation has multiple uses, only promote it if we are strictly // increasing the alignment of the resultant allocation. If we keep it the - // same, we open the door to infinite loops of various kinds. (A reference - // from a dbg.declare doesn't count as a use for this purpose.) - if (!AI.hasOneUse() && !hasOneUsePlusDeclare(&AI) && - CastElTyAlign == AllocElTyAlign) return 0; + // same, we open the door to infinite loops of various kinds. + if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return 0; uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy); uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy); @@ -128,15 +126,10 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, New->setAlignment(AI.getAlignment()); New->takeName(&AI); - // If the allocation has one real use plus a dbg.declare, just remove the - // declare. - if (DbgDeclareInst *DI = hasOneUsePlusDeclare(&AI)) { - EraseInstFromFunction(*(Instruction*)DI); - } // If the allocation has multiple real uses, insert a cast and change all // things that used it to use the new cast. This will also hack on CI, but it // will die soon. - else if (!AI.hasOneUse()) { + if (!AI.hasOneUse()) { // New is the allocation instruction, pointer typed. AI is the original // allocation instruction, also pointer typed. Thus, cast to use is BitCast. Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast"); @@ -203,7 +196,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, } case Instruction::PHI: { PHINode *OPN = cast<PHINode>(I); - PHINode *NPN = PHINode::Create(Ty); + PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues()); for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) { Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); NPN->addIncoming(V, OPN->getIncomingBlock(i)); @@ -883,6 +876,102 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { return 0; } +/// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations +/// in order to eliminate the icmp. +Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { + Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1); + ICmpInst::Predicate Pred = ICI->getPredicate(); + + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { + // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if negative + // (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive + if ((Pred == ICmpInst::ICMP_SLT && Op1C->isZero()) || + (Pred == ICmpInst::ICMP_SGT && Op1C->isAllOnesValue())) { + + Value *Sh = ConstantInt::get(Op0->getType(), + Op0->getType()->getScalarSizeInBits()-1); + Value *In = Builder->CreateAShr(Op0, Sh, Op0->getName()+".lobit"); + if (In->getType() != CI.getType()) + In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/, "tmp"); + + if (Pred == ICmpInst::ICMP_SGT) + In = Builder->CreateNot(In, In->getName()+".not"); + return ReplaceInstUsesWith(CI, In); + } + + // If we know that only one bit of the LHS of the icmp can be set and we + // have an equality comparison with zero or a power of 2, we can transform + // the icmp and sext into bitwise/integer operations. + if (ICI->hasOneUse() && + ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){ + unsigned BitWidth = Op1C->getType()->getBitWidth(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + APInt TypeMask(APInt::getAllOnesValue(BitWidth)); + ComputeMaskedBits(Op0, TypeMask, KnownZero, KnownOne); + + APInt KnownZeroMask(~KnownZero); + if (KnownZeroMask.isPowerOf2()) { + Value *In = ICI->getOperand(0); + + // If the icmp tests for a known zero bit we can constant fold it. + if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) { + Value *V = Pred == ICmpInst::ICMP_NE ? + ConstantInt::getAllOnesValue(CI.getType()) : + ConstantInt::getNullValue(CI.getType()); + return ReplaceInstUsesWith(CI, V); + } + + if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) { + // sext ((x & 2^n) == 0) -> (x >> n) - 1 + // sext ((x & 2^n) != 2^n) -> (x >> n) - 1 + unsigned ShiftAmt = KnownZeroMask.countTrailingZeros(); + // Perform a right shift to place the desired bit in the LSB. + if (ShiftAmt) + In = Builder->CreateLShr(In, + ConstantInt::get(In->getType(), ShiftAmt)); + + // At this point "In" is either 1 or 0. Subtract 1 to turn + // {1, 0} -> {0, -1}. + In = Builder->CreateAdd(In, + ConstantInt::getAllOnesValue(In->getType()), + "sext"); + } else { + // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1 + // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1 + unsigned ShiftAmt = KnownZeroMask.countLeadingZeros(); + // Perform a left shift to place the desired bit in the MSB. + if (ShiftAmt) + In = Builder->CreateShl(In, + ConstantInt::get(In->getType(), ShiftAmt)); + + // Distribute the bit over the whole bit width. + In = Builder->CreateAShr(In, ConstantInt::get(In->getType(), + BitWidth - 1), "sext"); + } + + if (CI.getType() == In->getType()) + return ReplaceInstUsesWith(CI, In); + return CastInst::CreateIntegerCast(In, CI.getType(), true/*SExt*/); + } + } + } + + // vector (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed. + if (const VectorType *VTy = dyn_cast<VectorType>(CI.getType())) { + if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_Zero()) && + Op0->getType() == CI.getType()) { + const Type *EltTy = VTy->getElementType(); + + // splat the shift constant to a constant vector. + Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1); + Value *In = Builder->CreateAShr(Op0, VSh, Op0->getName()+".lobit"); + return ReplaceInstUsesWith(CI, In); + } + } + + return 0; +} + /// CanEvaluateSExtd - Return true if we can take the specified value /// and return it as type Ty without inserting any new casts and without /// changing the value of the common low bits. This is used by code that tries @@ -1006,44 +1095,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext"); return BinaryOperator::CreateAShr(Res, ShAmt); } - - - // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed - // (x >s -1) ? -1 : 0 -> ashr x, 31 -> all ones if not signed - { - ICmpInst::Predicate Pred; Value *CmpLHS; ConstantInt *CmpRHS; - if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_ConstantInt(CmpRHS)))) { - // sext (x <s 0) to i32 --> x>>s31 true if signbit set. - // sext (x >s -1) to i32 --> (x>>s31)^-1 true if signbit clear. - if ((Pred == ICmpInst::ICMP_SLT && CmpRHS->isZero()) || - (Pred == ICmpInst::ICMP_SGT && CmpRHS->isAllOnesValue())) { - Value *Sh = ConstantInt::get(CmpLHS->getType(), - CmpLHS->getType()->getScalarSizeInBits()-1); - Value *In = Builder->CreateAShr(CmpLHS, Sh, CmpLHS->getName()+".lobit"); - if (In->getType() != CI.getType()) - In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/, "tmp"); - - if (Pred == ICmpInst::ICMP_SGT) - In = Builder->CreateNot(In, In->getName()+".not"); - return ReplaceInstUsesWith(CI, In); - } - } - } - // vector (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed. - if (const VectorType *VTy = dyn_cast<VectorType>(DestTy)) { - ICmpInst::Predicate Pred; Value *CmpLHS; - if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_Zero()))) { - if (Pred == ICmpInst::ICMP_SLT && CmpLHS->getType() == DestTy) { - const Type *EltTy = VTy->getElementType(); - - // splat the shift constant to a constant vector. - Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1); - Value *In = Builder->CreateAShr(CmpLHS, VSh,CmpLHS->getName()+".lobit"); - return ReplaceInstUsesWith(CI, In); - } - } - } + if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src)) + return transformSExtICmp(ICI, CI); // If the input is a shl/ashr pair of a same constant, then this is a sign // extension from a smaller value. If we could trust arbitrary bitwidth diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 999de34..bb9b88b 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -699,7 +699,7 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, - // so the values can never be equal. Similiarly for all other "or equals" + // so the values can never be equal. Similarly for all other "or equals" // operators. // (X+1) <u X --> X >u (MAXUINT-1) --> X == 255 @@ -1289,13 +1289,21 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI) - case Instruction::AShr: - // Only handle equality comparisons of shift-by-constant. - if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) - if (Instruction *Res = FoldICmpShrCst(ICI, cast<BinaryOperator>(LHSI), - ShAmt)) + case Instruction::AShr: { + // Handle equality comparisons of shift-by-constant. + BinaryOperator *BO = cast<BinaryOperator>(LHSI); + if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) { + if (Instruction *Res = FoldICmpShrCst(ICI, BO, ShAmt)) return Res; + } + + // Handle exact shr's. + if (ICI.isEquality() && BO->isExact() && BO->hasOneUse()) { + if (RHSV.isMinValue()) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), RHS); + } break; + } case Instruction::SDiv: case Instruction::UDiv: @@ -1376,9 +1384,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (Value *NegVal = dyn_castNegVal(BOp1)) return new ICmpInst(ICI.getPredicate(), BOp0, NegVal); - else if (Value *NegVal = dyn_castNegVal(BOp0)) + if (Value *NegVal = dyn_castNegVal(BOp0)) return new ICmpInst(ICI.getPredicate(), NegVal, BOp1); - else if (BO->hasOneUse()) { + if (BO->hasOneUse()) { Value *Neg = Builder->CreateNeg(BOp1); Neg->takeName(BO); return new ICmpInst(ICI.getPredicate(), BOp0, Neg); @@ -1855,11 +1863,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(ICmpInst::ICMP_SLT, Op0, ConstantInt::get(CI->getContext(), CI->getValue()+1)); case ICmpInst::ICMP_UGE: - assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE + assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE return new ICmpInst(ICmpInst::ICMP_UGT, Op0, ConstantInt::get(CI->getContext(), CI->getValue()-1)); case ICmpInst::ICMP_SGE: - assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE + assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE return new ICmpInst(ICmpInst::ICMP_SGT, Op0, ConstantInt::get(CI->getContext(), CI->getValue()-1)); } @@ -1907,18 +1915,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // that code below can assume that Min != Max. if (!isa<Constant>(Op0) && Op0Min == Op0Max) return new ICmpInst(I.getPredicate(), - ConstantInt::get(I.getContext(), Op0Min), Op1); + ConstantInt::get(Op0->getType(), Op0Min), Op1); if (!isa<Constant>(Op1) && Op1Min == Op1Max) return new ICmpInst(I.getPredicate(), Op0, - ConstantInt::get(I.getContext(), Op1Min)); + ConstantInt::get(Op1->getType(), Op1Min)); // Based on the range information we know about the LHS, see if we can - // simplify this comparison. For example, (x&4) < 8 is always true. + // simplify this comparison. For example, (x&4) < 8 is always true. switch (I.getPredicate()) { default: llvm_unreachable("Unknown icmp opcode!"); case ICmpInst::ICMP_EQ: { if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); // If all bits are known zero except for one, then we know at most one // bit is set. If the comparison is against zero, then this is a check @@ -1955,7 +1963,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } case ICmpInst::ICMP_NE: { if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); // If all bits are known zero except for one, then we know at most one // bit is set. If the comparison is against zero, then this is a check @@ -1992,9 +2000,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } case ICmpInst::ICMP_ULT: if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { @@ -2010,9 +2018,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { break; case ICmpInst::ICMP_UGT: if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); @@ -2029,9 +2037,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { break; case ICmpInst::ICMP_SLT: if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { @@ -2042,9 +2050,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { break; case ICmpInst::ICMP_SGT: if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); @@ -2057,30 +2065,30 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { case ICmpInst::ICMP_SGE: assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!"); if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; case ICmpInst::ICMP_SLE: assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!"); if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; case ICmpInst::ICMP_UGE: assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!"); if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; case ICmpInst::ICMP_ULE: assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!"); if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; } @@ -2306,6 +2314,35 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { BO0->hasOneUse() && BO1->hasOneUse()) return new ICmpInst(Pred, D, B); + BinaryOperator *SRem = NULL; + // icmp (srem X, Y), Y + if (BO0 && BO0->getOpcode() == Instruction::SRem && + Op1 == BO0->getOperand(1)) + SRem = BO0; + // icmp Y, (srem X, Y) + else if (BO1 && BO1->getOpcode() == Instruction::SRem && + Op0 == BO1->getOperand(1)) + SRem = BO1; + if (SRem) { + // We don't check hasOneUse to avoid increasing register pressure because + // the value we use is the same value this instruction was already using. + switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) { + default: break; + case ICmpInst::ICMP_EQ: + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); + case ICmpInst::ICMP_NE: + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1), + Constant::getAllOnesValue(SRem->getType())); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1), + Constant::getNullValue(SRem->getType())); + } + } + if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() && BO0->hasOneUse() && BO1->hasOneUse() && BO0->getOperand(1) == BO1->getOperand(1)) { @@ -2356,6 +2393,27 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } } break; + case Instruction::UDiv: + case Instruction::LShr: + if (I.isSigned()) + break; + // fall-through + case Instruction::SDiv: + case Instruction::AShr: + if (!BO0->isExact() && !BO1->isExact()) + break; + return new ICmpInst(I.getPredicate(), BO0->getOperand(0), + BO1->getOperand(0)); + case Instruction::Shl: { + bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap(); + bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap(); + if (!NUW && !NSW) + break; + if (!NSW && I.isSigned()) + break; + return new ICmpInst(I.getPredicate(), BO0->getOperand(0), + BO1->getOperand(0)); + } } } } @@ -2425,9 +2483,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 - if (Op0->hasOneUse() && Op1->hasOneUse() && - match(Op0, m_And(m_Value(A), m_Value(B))) && - match(Op1, m_And(m_Value(C), m_Value(D)))) { + if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) && + match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) { Value *X = 0, *Y = 0, *Z = 0; if (A == C) { @@ -2448,6 +2505,32 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return &I; } } + + // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to + // "icmp (and X, mask), cst" + uint64_t ShAmt = 0; + ConstantInt *Cst1; + if (Op0->hasOneUse() && + match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A), + m_ConstantInt(ShAmt))))) && + match(Op1, m_ConstantInt(Cst1)) && + // Only do this when A has multiple uses. This is most important to do + // when it exposes other optimizations. + !A->hasOneUse()) { + unsigned ASize =cast<IntegerType>(A->getType())->getPrimitiveSizeInBits(); + + if (ShAmt < ASize) { + APInt MaskV = + APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits()); + MaskV <<= ShAmt; + + APInt CmpV = Cst1->getValue().zext(ASize); + CmpV <<= ShAmt; + + Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV)); + return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV)); + } + } } { @@ -2704,6 +2787,42 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { if (Constant *RHSC = dyn_cast<Constant>(Op1)) { if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) switch (LHSI->getOpcode()) { + case Instruction::FPExt: { + // fcmp (fpext x), C -> fcmp x, (fptrunc C) if fptrunc is lossless + FPExtInst *LHSExt = cast<FPExtInst>(LHSI); + ConstantFP *RHSF = dyn_cast<ConstantFP>(RHSC); + if (!RHSF) + break; + + // We can't convert a PPC double double. + if (RHSF->getType()->isPPC_FP128Ty()) + break; + + const fltSemantics *Sem; + // FIXME: This shouldn't be here. + if (LHSExt->getSrcTy()->isFloatTy()) + Sem = &APFloat::IEEEsingle; + else if (LHSExt->getSrcTy()->isDoubleTy()) + Sem = &APFloat::IEEEdouble; + else if (LHSExt->getSrcTy()->isFP128Ty()) + Sem = &APFloat::IEEEquad; + else if (LHSExt->getSrcTy()->isX86_FP80Ty()) + Sem = &APFloat::x87DoubleExtended; + else + break; + + bool Lossy; + APFloat F = RHSF->getValueAPF(); + F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy); + + // Avoid lossy conversions and denormals. + if (!Lossy && + F.compare(APFloat::getSmallestNormalized(*Sem)) != + APFloat::cmpLessThan) + return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), + ConstantFP::get(RHSC->getContext(), F)); + break; + } case Instruction::PHI: // Only fold fcmp into the PHI if the phi and fcmp are in the same // block. If in the same block, we're encouraging jump threading. If @@ -2742,6 +2861,14 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); break; } + case Instruction::FSub: { + // fcmp pred (fneg x), C -> fcmp swap(pred) x, -C + Value *Op; + if (match(LHSI, m_FNeg(m_Value(Op)))) + return new FCmpInst(I.getSwappedPredicate(), Op, + ConstantExpr::getFNeg(RHSC)); + break; + } case Instruction::Load: if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) { @@ -2755,5 +2882,17 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { } } + // fcmp pred (fneg x), (fneg y) -> fcmp swap(pred) x, y + Value *X, *Y; + if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y)))) + return new FCmpInst(I.getSwappedPredicate(), X, Y); + + // fcmp (fpext x), (fpext y) -> fcmp x, y + if (FPExtInst *LHSExt = dyn_cast<FPExtInst>(Op0)) + if (FPExtInst *RHSExt = dyn_cast<FPExtInst>(Op1)) + if (LHSExt->getSrcTy() == RHSExt->getSrcTy()) + return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), + RHSExt->getOperand(0)); + return Changed ? &I : 0; } diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 78ff734..432adc9 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -364,34 +364,12 @@ static bool equivalentAddressValues(Value *A, Value *B) { return false; } -// If this instruction has two uses, one of which is a llvm.dbg.declare, -// return the llvm.dbg.declare. -DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) { - if (!V->hasNUses(2)) - return 0; - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI) { - User *U = *UI; - if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(U)) - return DI; - if (isa<BitCastInst>(U) && U->hasOneUse()) { - if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(*U->use_begin())) - return DI; - } - } - return 0; -} - Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { Value *Val = SI.getOperand(0); Value *Ptr = SI.getOperand(1); // If the RHS is an alloca with a single use, zapify the store, making the // alloca dead. - // If the RHS is an alloca with a two uses, the other one being a - // llvm.dbg.declare, zapify the store and the declare, making the - // alloca dead. We must do this to prevent declares from affecting - // codegen. if (!SI.isVolatile()) { if (Ptr->hasOneUse()) { if (isa<AllocaInst>(Ptr)) @@ -400,17 +378,9 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (isa<AllocaInst>(GEP->getOperand(0))) { if (GEP->getOperand(0)->hasOneUse()) return EraseInstFromFunction(SI); - if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) { - EraseInstFromFunction(*DI); - return EraseInstFromFunction(SI); - } } } } - if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) { - EraseInstFromFunction(*DI); - return EraseInstFromFunction(SI); - } } // Attempt to improve the alignment. @@ -621,8 +591,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { // Insert a PHI node now if we need it. Value *MergedVal = OtherStore->getOperand(0); if (MergedVal != SI.getOperand(0)) { - PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge"); - PN->reserveOperandSpace(2); + PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge"); PN->addIncoming(SI.getOperand(0), SI.getParent()); PN->addIncoming(OtherStore->getOperand(0), OtherBB); MergedVal = InsertNewInstBefore(PN, DestBB->front()); diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index d1a1fd6..57fb08a 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -320,6 +320,10 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { } } + // See if we can fold away this div instruction. + if (SimplifyDemandedInstructionBits(I)) + return &I; + // (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y Value *X = 0, *Z = 0; if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) { // (X - Z) / Y; Y = Op1 @@ -332,6 +336,19 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { return 0; } +/// dyn_castZExtVal - Checks if V is a zext or constant that can +/// be truncated to Ty without losing bits. +static Value *dyn_castZExtVal(Value *V, const Type *Ty) { + if (ZExtInst *Z = dyn_cast<ZExtInst>(V)) { + if (Z->getSrcTy() == Ty) + return Z->getOperand(0); + } else if (ConstantInt *C = dyn_cast<ConstantInt>(V)) { + if (C->getValue().getActiveBits() <= cast<IntegerType>(Ty)->getBitWidth()) + return ConstantExpr::getTrunc(C, Ty); + } + return 0; +} + Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); @@ -390,6 +407,14 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { return SelectInst::Create(Cond, TSI, FSI); } } + + // (zext A) udiv (zext B) --> zext (A udiv B) + if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0)) + if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy())) + return new ZExtInst(Builder->CreateUDiv(ZOp0->getOperand(0), ZOp1, "div", + I.isExact()), + I.getType()); + return 0; } @@ -452,27 +477,17 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { if (Value *V = SimplifyFDivInst(Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); - return 0; -} + if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { + const APFloat &Op1F = Op1C->getValueAPF(); -/// This function implements the transforms on rem instructions that work -/// regardless of the kind of rem instruction it is (urem, srem, or frem). It -/// is used by the visitors to those instructions. -/// @brief Transforms common to all three rem instructions -Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (isa<UndefValue>(Op0)) { // undef % X -> 0 - if (I.getType()->isFPOrFPVectorTy()) - return ReplaceInstUsesWith(I, Op0); // X % undef -> undef (could be SNaN) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + // If the divisor has an exact multiplicative inverse we can turn the fdiv + // into a cheaper fmul. + APFloat Reciprocal(Op1F.getSemantics()); + if (Op1F.getExactInverse(&Reciprocal)) { + ConstantFP *RFP = ConstantFP::get(Builder->getContext(), Reciprocal); + return BinaryOperator::CreateFMul(Op0, RFP); + } } - if (isa<UndefValue>(Op1)) - return ReplaceInstUsesWith(I, Op1); // X % undef -> undef - - // Handle cases involving: rem X, (select Cond, Y, Z) - if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) - return &I; return 0; } @@ -484,26 +499,11 @@ Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) { Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Instruction *common = commonRemTransforms(I)) - return common; - - // X % X == 0 - if (Op0 == Op1) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - // 0 % X == 0 for integer, we don't need to preserve faults! - if (Constant *LHS = dyn_cast<Constant>(Op0)) - if (LHS->isNullValue()) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + // Handle cases involving: rem X, (select Cond, Y, Z) + if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) + return &I; if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { - // X % 0 == undef, we don't need to preserve faults! - if (RHS->equalsInt(0)) - return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); - - if (RHS->equalsInt(1)) // X % 1 == 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) { if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) { if (Instruction *R = FoldOpIntoSelect(I, SI)) @@ -525,6 +525,9 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { Instruction *InstCombiner::visitURem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyURemInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + if (Instruction *common = commonIRemTransforms(I)) return common; @@ -552,13 +555,22 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { return SelectInst::Create(Cond, TrueAnd, FalseAnd); } } - + + // (zext A) urem (zext B) --> zext (A urem B) + if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0)) + if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy())) + return new ZExtInst(Builder->CreateURem(ZOp0->getOperand(0), ZOp1), + I.getType()); + return 0; } Instruction *InstCombiner::visitSRem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifySRemInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + // Handle the integer rem common cases if (Instruction *Common = commonIRemTransforms(I)) return Common; @@ -617,6 +629,14 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { } Instruction *InstCombiner::visitFRem(BinaryOperator &I) { - return commonRemTransforms(I); -} + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (Value *V = SimplifyFRemInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + // Handle cases involving: rem X, (select Cond, Y, Z) + if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) + return &I; + return 0; +} diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 297a18c..abf61bb 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -80,18 +80,16 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { Value *InRHS = FirstInst->getOperand(1); PHINode *NewLHS = 0, *NewRHS = 0; if (LHSVal == 0) { - NewLHS = PHINode::Create(LHSType, + NewLHS = PHINode::Create(LHSType, PN.getNumIncomingValues(), FirstInst->getOperand(0)->getName() + ".pn"); - NewLHS->reserveOperandSpace(PN.getNumOperands()/2); NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0)); InsertNewInstBefore(NewLHS, PN); LHSVal = NewLHS; } if (RHSVal == 0) { - NewRHS = PHINode::Create(RHSType, + NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(), FirstInst->getOperand(1)->getName() + ".pn"); - NewRHS->reserveOperandSpace(PN.getNumOperands()/2); NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0)); InsertNewInstBefore(NewRHS, PN); RHSVal = NewRHS; @@ -202,11 +200,10 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) { if (FixedOperands[i]) continue; // operand doesn't need a phi. Value *FirstOp = FirstInst->getOperand(i); - PHINode *NewPN = PHINode::Create(FirstOp->getType(), + PHINode *NewPN = PHINode::Create(FirstOp->getType(), e, FirstOp->getName()+".pn"); InsertNewInstBefore(NewPN, PN); - NewPN->reserveOperandSpace(e); NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0)); OperandPhis[i] = NewPN; FixedOperands[i] = NewPN; @@ -240,7 +237,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { /// obvious the value of the load is not changed from the point of the load to /// the end of the block it is in. /// -/// Finally, it is safe, but not profitable, to sink a load targetting a +/// Finally, it is safe, but not profitable, to sink a load targeting a /// non-address-taken alloca. Doing so will cause us to not promote the alloca /// to a register. static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { @@ -340,8 +337,8 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { // Okay, they are all the same operation. Create a new PHI node of the // correct type, and PHI together all of the LHS's of the instructions. PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(), + PN.getNumIncomingValues(), PN.getName()+".in"); - NewPN->reserveOperandSpace(PN.getNumOperands()/2); Value *InVal = FirstLI->getOperand(0); NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); @@ -446,8 +443,8 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { // Okay, they are all the same operation. Create a new PHI node of the // correct type, and PHI together all of the LHS's of the instructions. PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(), + PN.getNumIncomingValues(), PN.getName()+".in"); - NewPN->reserveOperandSpace(PN.getNumOperands()/2); Value *InVal = FirstInst->getOperand(0); NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); @@ -699,7 +696,8 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) { // Otherwise, Create the new PHI node for this user. - EltPHI = PHINode::Create(Ty, PN->getName()+".off"+Twine(Offset), PN); + EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(), + PN->getName()+".off"+Twine(Offset), PN); assert(EltPHI->getType() != PN->getType() && "Truncate didn't shrink phi?"); @@ -776,9 +774,6 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // PHINode simplification // Instruction *InstCombiner::visitPHINode(PHINode &PN) { - // If LCSSA is around, don't mess with Phi nodes - if (MustPreserveLCSSA) return 0; - if (Value *V = SimplifyInstruction(&PN, TD)) return ReplaceInstUsesWith(PN, V); @@ -826,18 +821,18 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { // quick check to see if the PHI node only contains a single non-phi value, if // so, scan to see if the phi cycle is actually equal to that value. { - unsigned InValNo = 0, NumOperandVals = PN.getNumIncomingValues(); + unsigned InValNo = 0, NumIncomingVals = PN.getNumIncomingValues(); // Scan for the first non-phi operand. - while (InValNo != NumOperandVals && + while (InValNo != NumIncomingVals && isa<PHINode>(PN.getIncomingValue(InValNo))) ++InValNo; - if (InValNo != NumOperandVals) { - Value *NonPhiInVal = PN.getOperand(InValNo); + if (InValNo != NumIncomingVals) { + Value *NonPhiInVal = PN.getIncomingValue(InValNo); // Scan the rest of the operands to see if there are any conflicts, if so // there is no need to recursively scan other phis. - for (++InValNo; InValNo != NumOperandVals; ++InValNo) { + for (++InValNo; InValNo != NumIncomingVals; ++InValNo) { Value *OpVal = PN.getIncomingValue(InValNo); if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal)) break; @@ -846,7 +841,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { // If we scanned over all operands, then we have one unique value plus // phi values. Scan PHI nodes to see if they all merge in each other or // the value. - if (InValNo == NumOperandVals) { + if (InValNo == NumIncomingVals) { SmallPtrSet<PHINode*, 16> ValueEqualPHIs; if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs)) return ReplaceInstUsesWith(PN, NonPhiInVal); diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 97abc76..61a433a 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -214,7 +214,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, unsigned OpToFold = 0; if ((SFO & 1) && FalseVal == TVI->getOperand(0)) { OpToFold = 1; - } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) { + } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) { OpToFold = 2; } @@ -227,9 +227,16 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C); InsertNewInstBefore(NewSel, SI); NewSel->takeName(TVI); - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TVI)) - return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel); - llvm_unreachable("Unknown instruction!!"); + BinaryOperator *TVI_BO = cast<BinaryOperator>(TVI); + BinaryOperator *BO = BinaryOperator::Create(TVI_BO->getOpcode(), + FalseVal, NewSel); + if (isa<PossiblyExactOperator>(BO)) + BO->setIsExact(TVI_BO->isExact()); + if (isa<OverflowingBinaryOperator>(BO)) { + BO->setHasNoUnsignedWrap(TVI_BO->hasNoUnsignedWrap()); + BO->setHasNoSignedWrap(TVI_BO->hasNoSignedWrap()); + } + return BO; } } } @@ -243,7 +250,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, unsigned OpToFold = 0; if ((SFO & 1) && TrueVal == FVI->getOperand(0)) { OpToFold = 1; - } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) { + } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) { OpToFold = 2; } @@ -256,9 +263,16 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp); InsertNewInstBefore(NewSel, SI); NewSel->takeName(FVI); - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FVI)) - return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel); - llvm_unreachable("Unknown instruction!!"); + BinaryOperator *FVI_BO = cast<BinaryOperator>(FVI); + BinaryOperator *BO = BinaryOperator::Create(FVI_BO->getOpcode(), + TrueVal, NewSel); + if (isa<PossiblyExactOperator>(BO)) + BO->setIsExact(FVI_BO->isExact()); + if (isa<OverflowingBinaryOperator>(BO)) { + BO->setHasNoUnsignedWrap(FVI_BO->hasNoUnsignedWrap()); + BO->setHasNoSignedWrap(FVI_BO->hasNoSignedWrap()); + } + return BO; } } } @@ -424,6 +438,19 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, return ReplaceInstUsesWith(SI, TrueVal); /// NOTE: if we wanted to, this is where to detect integer MIN/MAX } + + if (isa<Constant>(CmpRHS)) { + if (CmpLHS == TrueVal && Pred == ICmpInst::ICMP_EQ) { + // Transform (X == C) ? X : Y -> (X == C) ? C : Y + SI.setOperand(1, CmpRHS); + Changed = true; + } else if (CmpLHS == FalseVal && Pred == ICmpInst::ICMP_NE) { + // Transform (X != C) ? Y : X -> (X != C) ? Y : C + SI.setOperand(2, CmpRHS); + Changed = true; + } + } + return Changed ? &SI : 0; } @@ -503,9 +530,8 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, if (!IC || !IC->isEquality()) return 0; - if (ConstantInt *C = dyn_cast<ConstantInt>(IC->getOperand(1))) - if (!C->isZero()) - return 0; + if (!match(IC->getOperand(1), m_Zero())) + return 0; ConstantInt *AndRHS; Value *LHS = IC->getOperand(0); diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index a7f8005..811f949 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -644,7 +644,14 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { return &I; } } - + + // (C1 << A) << C2 -> (C1 << C2) << A + Constant *C1, *C2; + Value *A; + if (match(I.getOperand(0), m_OneUse(m_Shl(m_Constant(C1), m_Value(A)))) && + match(I.getOperand(1), m_Constant(C2))) + return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A); + return 0; } diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index bda8cea..6e727ce 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -684,6 +684,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; case Instruction::SRem: if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { + // X % -1 demands all the bits because we don't want to introduce + // INT_MIN % -1 (== undef) by accident. + if (Rem->isAllOnesValue()) + break; APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { if (DemandedMask.ult(RA)) // srem won't affect demanded bits @@ -712,6 +716,18 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); } } + + // The sign bit is the LHS's sign bit, except when the result of the + // remainder is zero. + if (DemandedMask.isNegative() && KnownZero.isNonNegative()) { + APInt Mask2 = APInt::getSignBit(BitWidth); + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, + Depth+1); + // If it's known zero, our sign bit is also zero. + if (LHSKnownZero.isNegative()) + KnownZero |= LHSKnownZero; + } break; case Instruction::URem: { APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 5caa12d..ad6a8d0 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -230,8 +230,16 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { ConstantInt::get(Int32Ty, SrcIdx, false)); } + } else if (CastInst *CI = dyn_cast<CastInst>(I)) { + // Canonicalize extractelement(cast) -> cast(extractelement) + // bitcasts can change the number of vector elements and they cost nothing + if (CI->hasOneUse() && EI.hasOneUse() && + (CI->getOpcode() != Instruction::BitCast)) { + Value *EE = Builder->CreateExtractElement(CI->getOperand(0), + EI.getIndexOperand()); + return CastInst::Create(CI->getOpcode(), EE, EI.getType()); + } } - // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement) } return 0; } diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h index 9100a85..32009c3 100644 --- a/lib/Transforms/InstCombine/InstCombineWorklist.h +++ b/lib/Transforms/InstCombine/InstCombineWorklist.h @@ -53,6 +53,7 @@ public: void AddInitialGroup(Instruction *const *List, unsigned NumEntries) { assert(Worklist.empty() && "Worklist must be empty to add initial group"); Worklist.reserve(NumEntries+16); + WorklistMap.resize(NumEntries); DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n"); for (; NumEntries; --NumEntries) { Instruction *I = List[NumEntries-1]; diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 37123d0..7a84598 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -76,7 +76,6 @@ INITIALIZE_PASS(InstCombiner, "instcombine", "Combine redundant instructions", false, false) void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreservedID(LCSSAID); AU.setPreservesCFG(); } @@ -600,8 +599,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { } // Okay, we can do the transformation: create the new PHI node. - PHINode *NewPN = PHINode::Create(I.getType(), ""); - NewPN->reserveOperandSpace(PN->getNumOperands()/2); + PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues(), ""); InsertNewInstBefore(NewPN, *PN); NewPN->takeName(PN); @@ -850,22 +848,23 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(), Indices.end(), GEP.getName()); } - + // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). Value *StrippedPtr = PtrOp->stripPointerCasts(); - if (StrippedPtr != PtrOp) { - const PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType()); + const PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType()); + if (StrippedPtr != PtrOp && + StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { bool HasZeroPointerIndex = false; if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1))) HasZeroPointerIndex = C->isZero(); - + // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... // into : GEP [10 x i8]* X, i32 0, ... // // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... // into : GEP i8* X, ... - // + // // This occurs when the program declares an array extern like "int X[];" if (HasZeroPointerIndex) { const PointerType *CPTy = cast<PointerType>(PtrOp->getType()); @@ -976,7 +975,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } } } - + /// See if we can simplify: /// X = bitcast A* to B* /// Y = gep X, <...constant indices...> @@ -984,12 +983,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) { if (TD && - !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) { + !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices() && + StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { + // Determine how much the GEP moves the pointer. We are guaranteed to get // a constant back from EmitGEPOffset. ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP)); int64_t Offset = OffsetV->getSExtValue(); - + // If this GEP instruction doesn't move the pointer, just replace the GEP // with a bitcast of the real input to the dest type. if (Offset == 0) { @@ -1635,7 +1636,6 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { bool InstCombiner::runOnFunction(Function &F) { - MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID); TD = getAnalysisIfAvailable<TargetData>(); @@ -1648,6 +1648,10 @@ bool InstCombiner::runOnFunction(Function &F) { bool EverMadeChange = false; + // Lower dbg.declare intrinsics otherwise their value may be clobbered + // by instcombiner. + EverMadeChange = LowerDbgDeclare(F); + // Iterate while there is work to do. unsigned Iteration = 0; while (DoOneIteration(F, Iteration++)) diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index 0ac1cb0..5700ac8 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_library(LLVMInstrumentation EdgeProfiling.cpp + GCOVProfiling.cpp Instrumentation.cpp OptimalEdgeProfiling.cpp PathProfiling.cpp diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp new file mode 100644 index 0000000..2425342 --- /dev/null +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -0,0 +1,638 @@ +//===- GCOVProfiling.cpp - Insert edge counters for gcov profiling --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements GCOV-style profiling. When this pass is run it emits +// "gcno" files next to the existing source, and instruments the code that runs +// to records the edges between blocks that run and emit a complementary "gcda" +// file on exit. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "insert-gcov-profiling" + +#include "ProfilingUtils.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Instructions.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/DebugLoc.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Support/PathV2.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/UniqueVector.h" +#include <string> +#include <utility> +using namespace llvm; + +namespace { + class GCOVProfiler : public ModulePass { + bool runOnModule(Module &M); + public: + static char ID; + GCOVProfiler() + : ModulePass(ID), EmitNotes(true), EmitData(true) { + initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); + } + GCOVProfiler(bool EmitNotes, bool EmitData) + : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData) { + assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?"); + initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); + } + virtual const char *getPassName() const { + return "GCOV Profiler"; + } + + private: + // Create the GCNO files for the Module based on DebugInfo. + void emitGCNO(DebugInfoFinder &DIF); + + // Modify the program to track transitions along edges and call into the + // profiling runtime to emit .gcda files when run. + bool emitProfileArcs(DebugInfoFinder &DIF); + + // Get pointers to the functions in the runtime library. + Constant *getStartFileFunc(); + Constant *getIncrementIndirectCounterFunc(); + Constant *getEmitFunctionFunc(); + Constant *getEmitArcsFunc(); + Constant *getEndFileFunc(); + + // Create or retrieve an i32 state value that is used to represent the + // pred block number for certain non-trivial edges. + GlobalVariable *getEdgeStateValue(); + + // Produce a table of pointers to counters, by predecessor and successor + // block number. + GlobalVariable *buildEdgeLookupTable(Function *F, + GlobalVariable *Counter, + const UniqueVector<BasicBlock *> &Preds, + const UniqueVector<BasicBlock *> &Succs); + + // Add the function to write out all our counters to the global destructor + // list. + void insertCounterWriteout(DebugInfoFinder &, + SmallVector<std::pair<GlobalVariable *, + uint32_t>, 8> &); + + bool EmitNotes; + bool EmitData; + + Module *M; + LLVMContext *Ctx; + }; +} + +char GCOVProfiler::ID = 0; +INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling", + "Insert instrumentation for GCOV profiling", false, false) + +ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData) { + return new GCOVProfiler(EmitNotes, EmitData); +} + +static DISubprogram findSubprogram(DIScope Scope) { + while (!Scope.isSubprogram()) { + assert(Scope.isLexicalBlock() && + "Debug location not lexical block or subprogram"); + Scope = DILexicalBlock(Scope).getContext(); + } + return DISubprogram(Scope); +} + +namespace { + class GCOVRecord { + protected: + static const char *LinesTag; + static const char *FunctionTag; + static const char *BlockTag; + static const char *EdgeTag; + + GCOVRecord() {} + + void writeBytes(const char *Bytes, int Size) { + os->write(Bytes, Size); + } + + void write(uint32_t i) { + writeBytes(reinterpret_cast<char*>(&i), 4); + } + + // Returns the length measured in 4-byte blocks that will be used to + // represent this string in a GCOV file + unsigned lengthOfGCOVString(StringRef s) { + // A GCOV string is a length, followed by a NUL, then between 0 and 3 NULs + // padding out to the next 4-byte word. The length is measured in 4-byte + // words including padding, not bytes of actual string. + return (s.size() + 5) / 4; + } + + void writeGCOVString(StringRef s) { + uint32_t Len = lengthOfGCOVString(s); + write(Len); + writeBytes(s.data(), s.size()); + + // Write 1 to 4 bytes of NUL padding. + assert((unsigned)(4 - (s.size() % 4)) > 0); + assert((unsigned)(4 - (s.size() % 4)) <= 4); + writeBytes("\0\0\0\0", 4 - (s.size() % 4)); + } + + raw_ostream *os; + }; + const char *GCOVRecord::LinesTag = "\0\0\x45\x01"; + const char *GCOVRecord::FunctionTag = "\0\0\0\1"; + const char *GCOVRecord::BlockTag = "\0\0\x41\x01"; + const char *GCOVRecord::EdgeTag = "\0\0\x43\x01"; + + class GCOVFunction; + class GCOVBlock; + + // Constructed only by requesting it from a GCOVBlock, this object stores a + // list of line numbers and a single filename, representing lines that belong + // to the block. + class GCOVLines : public GCOVRecord { + public: + void addLine(uint32_t Line) { + Lines.push_back(Line); + } + + uint32_t length() { + return lengthOfGCOVString(Filename) + 2 + Lines.size(); + } + + private: + friend class GCOVBlock; + + GCOVLines(std::string Filename, raw_ostream *os) + : Filename(Filename) { + this->os = os; + } + + std::string Filename; + SmallVector<uint32_t, 32> Lines; + }; + + // Represent a basic block in GCOV. Each block has a unique number in the + // function, number of lines belonging to each block, and a set of edges to + // other blocks. + class GCOVBlock : public GCOVRecord { + public: + GCOVLines &getFile(std::string Filename) { + GCOVLines *&Lines = LinesByFile[Filename]; + if (!Lines) { + Lines = new GCOVLines(Filename, os); + } + return *Lines; + } + + void addEdge(GCOVBlock &Successor) { + OutEdges.push_back(&Successor); + } + + void writeOut() { + uint32_t Len = 3; + for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(), + E = LinesByFile.end(); I != E; ++I) { + Len += I->second->length(); + } + + writeBytes(LinesTag, 4); + write(Len); + write(Number); + for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(), + E = LinesByFile.end(); I != E; ++I) { + write(0); + writeGCOVString(I->second->Filename); + for (int i = 0, e = I->second->Lines.size(); i != e; ++i) { + write(I->second->Lines[i]); + } + } + write(0); + write(0); + } + + ~GCOVBlock() { + DeleteContainerSeconds(LinesByFile); + } + + private: + friend class GCOVFunction; + + GCOVBlock(uint32_t Number, raw_ostream *os) + : Number(Number) { + this->os = os; + } + + uint32_t Number; + StringMap<GCOVLines *> LinesByFile; + SmallVector<GCOVBlock *, 4> OutEdges; + }; + + // A function has a unique identifier, a checksum (we leave as zero) and a + // set of blocks and a map of edges between blocks. This is the only GCOV + // object users can construct, the blocks and lines will be rooted here. + class GCOVFunction : public GCOVRecord { + public: + GCOVFunction(DISubprogram SP, raw_ostream *os) { + this->os = os; + + Function *F = SP.getFunction(); + uint32_t i = 0; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + Blocks[BB] = new GCOVBlock(i++, os); + } + ReturnBlock = new GCOVBlock(i++, os); + + writeBytes(FunctionTag, 4); + uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) + + 1 + lengthOfGCOVString(SP.getFilename()) + 1; + write(BlockLen); + uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP); + write(Ident); + write(0); // checksum + writeGCOVString(SP.getName()); + writeGCOVString(SP.getFilename()); + write(SP.getLineNumber()); + } + + ~GCOVFunction() { + DeleteContainerSeconds(Blocks); + delete ReturnBlock; + } + + GCOVBlock &getBlock(BasicBlock *BB) { + return *Blocks[BB]; + } + + GCOVBlock &getReturnBlock() { + return *ReturnBlock; + } + + void writeOut() { + // Emit count of blocks. + writeBytes(BlockTag, 4); + write(Blocks.size() + 1); + for (int i = 0, e = Blocks.size() + 1; i != e; ++i) { + write(0); // No flags on our blocks. + } + + // Emit edges between blocks. + for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(), + E = Blocks.end(); I != E; ++I) { + GCOVBlock &Block = *I->second; + if (Block.OutEdges.empty()) continue; + + writeBytes(EdgeTag, 4); + write(Block.OutEdges.size() * 2 + 1); + write(Block.Number); + for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) { + write(Block.OutEdges[i]->Number); + write(0); // no flags + } + } + + // Emit lines for each block. + for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(), + E = Blocks.end(); I != E; ++I) { + I->second->writeOut(); + } + } + + private: + DenseMap<BasicBlock *, GCOVBlock *> Blocks; + GCOVBlock *ReturnBlock; + }; +} + +// Replace the stem of a file, or add one if missing. +static std::string replaceStem(std::string OrigFilename, std::string NewStem) { + return (sys::path::stem(OrigFilename) + "." + NewStem).str(); +} + +bool GCOVProfiler::runOnModule(Module &M) { + this->M = &M; + Ctx = &M.getContext(); + + DebugInfoFinder DIF; + DIF.processModule(M); + + if (EmitNotes) emitGCNO(DIF); + if (EmitData) return emitProfileArcs(DIF); + return false; +} + +void GCOVProfiler::emitGCNO(DebugInfoFinder &DIF) { + DenseMap<const MDNode *, raw_fd_ostream *> GcnoFiles; + for (DebugInfoFinder::iterator I = DIF.compile_unit_begin(), + E = DIF.compile_unit_end(); I != E; ++I) { + // Each compile unit gets its own .gcno file. This means that whether we run + // this pass over the original .o's as they're produced, or run it after + // LTO, we'll generate the same .gcno files. + + DICompileUnit CU(*I); + raw_fd_ostream *&out = GcnoFiles[CU]; + std::string ErrorInfo; + out = new raw_fd_ostream(replaceStem(CU.getFilename(), "gcno").c_str(), + ErrorInfo, raw_fd_ostream::F_Binary); + out->write("oncg*404MVLL", 12); + } + + for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(), + SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) { + DISubprogram SP(*SPI); + raw_fd_ostream *&os = GcnoFiles[SP.getCompileUnit()]; + + Function *F = SP.getFunction(); + if (!F) continue; + GCOVFunction Func(SP, os); + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + GCOVBlock &Block = Func.getBlock(BB); + TerminatorInst *TI = BB->getTerminator(); + if (int successors = TI->getNumSuccessors()) { + for (int i = 0; i != successors; ++i) { + Block.addEdge(Func.getBlock(TI->getSuccessor(i))); + } + } else if (isa<ReturnInst>(TI)) { + Block.addEdge(Func.getReturnBlock()); + } + + uint32_t Line = 0; + for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) { + const DebugLoc &Loc = I->getDebugLoc(); + if (Loc.isUnknown()) continue; + if (Line == Loc.getLine()) continue; + Line = Loc.getLine(); + if (SP != findSubprogram(DIScope(Loc.getScope(*Ctx)))) continue; + + GCOVLines &Lines = Block.getFile(SP.getFilename()); + Lines.addLine(Loc.getLine()); + } + } + Func.writeOut(); + } + + for (DenseMap<const MDNode *, raw_fd_ostream *>::iterator + I = GcnoFiles.begin(), E = GcnoFiles.end(); I != E; ++I) { + raw_fd_ostream *&out = I->second; + out->write("\0\0\0\0\0\0\0\0", 8); // EOF + out->close(); + delete out; + } +} + +bool GCOVProfiler::emitProfileArcs(DebugInfoFinder &DIF) { + if (DIF.subprogram_begin() == DIF.subprogram_end()) + return false; + + SmallVector<std::pair<GlobalVariable *, uint32_t>, 8> CountersByIdent; + for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(), + SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) { + DISubprogram SP(*SPI); + Function *F = SP.getFunction(); + if (!F) continue; + + unsigned Edges = 0; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + if (isa<ReturnInst>(TI)) + ++Edges; + else + Edges += TI->getNumSuccessors(); + } + + const ArrayType *CounterTy = + ArrayType::get(Type::getInt64Ty(*Ctx), Edges); + GlobalVariable *Counters = + new GlobalVariable(*M, CounterTy, false, + GlobalValue::InternalLinkage, + Constant::getNullValue(CounterTy), + "__llvm_gcov_ctr", 0, false, 0); + CountersByIdent.push_back( + std::make_pair(Counters, reinterpret_cast<intptr_t>((MDNode*)SP))); + + UniqueVector<BasicBlock *> ComplexEdgePreds; + UniqueVector<BasicBlock *> ComplexEdgeSuccs; + + unsigned Edge = 0; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors(); + if (Successors) { + IRBuilder<> Builder(TI); + + if (Successors == 1) { + Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0, + Edge); + Value *Count = Builder.CreateLoad(Counter); + Count = Builder.CreateAdd(Count, + ConstantInt::get(Type::getInt64Ty(*Ctx),1)); + Builder.CreateStore(Count, Counter); + } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + Value *Sel = Builder.CreateSelect( + BI->getCondition(), + ConstantInt::get(Type::getInt64Ty(*Ctx), Edge), + ConstantInt::get(Type::getInt64Ty(*Ctx), Edge + 1)); + SmallVector<Value *, 2> Idx; + Idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx))); + Idx.push_back(Sel); + Value *Counter = Builder.CreateInBoundsGEP(Counters, + Idx.begin(), Idx.end()); + Value *Count = Builder.CreateLoad(Counter); + Count = Builder.CreateAdd(Count, + ConstantInt::get(Type::getInt64Ty(*Ctx),1)); + Builder.CreateStore(Count, Counter); + } else { + ComplexEdgePreds.insert(BB); + for (int i = 0; i != Successors; ++i) + ComplexEdgeSuccs.insert(TI->getSuccessor(i)); + } + Edge += Successors; + } + } + + if (!ComplexEdgePreds.empty()) { + GlobalVariable *EdgeTable = + buildEdgeLookupTable(F, Counters, + ComplexEdgePreds, ComplexEdgeSuccs); + GlobalVariable *EdgeState = getEdgeStateValue(); + + const Type *Int32Ty = Type::getInt32Ty(*Ctx); + for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) { + IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator()); + Builder.CreateStore(ConstantInt::get(Int32Ty, i), EdgeState); + } + for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) { + // call runtime to perform increment + IRBuilder<> Builder(ComplexEdgeSuccs[i+1]->getFirstNonPHI()); + Value *CounterPtrArray = + Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0, + i * ComplexEdgePreds.size()); + Builder.CreateCall2(getIncrementIndirectCounterFunc(), + EdgeState, CounterPtrArray); + // clear the predecessor number + Builder.CreateStore(ConstantInt::get(Int32Ty, 0xffffffff), EdgeState); + } + } + } + + insertCounterWriteout(DIF, CountersByIdent); + + return true; +} + +// All edges with successors that aren't branches are "complex", because it +// requires complex logic to pick which counter to update. +GlobalVariable *GCOVProfiler::buildEdgeLookupTable( + Function *F, + GlobalVariable *Counters, + const UniqueVector<BasicBlock *> &Preds, + const UniqueVector<BasicBlock *> &Succs) { + // TODO: support invoke, threads. We rely on the fact that nothing can modify + // the whole-Module pred edge# between the time we set it and the time we next + // read it. Threads and invoke make this untrue. + + // emit [(succs * preds) x i64*], logically [succ x [pred x i64*]]. + const Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx); + const ArrayType *EdgeTableTy = ArrayType::get( + Int64PtrTy, Succs.size() * Preds.size()); + + Constant **EdgeTable = new Constant*[Succs.size() * Preds.size()]; + Constant *NullValue = Constant::getNullValue(Int64PtrTy); + for (int i = 0, ie = Succs.size() * Preds.size(); i != ie; ++i) + EdgeTable[i] = NullValue; + + unsigned Edge = 0; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors(); + if (Successors > 1 && !isa<BranchInst>(TI) && !isa<ReturnInst>(TI)) { + for (int i = 0; i != Successors; ++i) { + BasicBlock *Succ = TI->getSuccessor(i); + IRBuilder<> builder(Succ); + Value *Counter = builder.CreateConstInBoundsGEP2_64(Counters, 0, + Edge + i); + EdgeTable[((Succs.idFor(Succ)-1) * Preds.size()) + + (Preds.idFor(BB)-1)] = cast<Constant>(Counter); + } + } + Edge += Successors; + } + + GlobalVariable *EdgeTableGV = + new GlobalVariable( + *M, EdgeTableTy, true, GlobalValue::InternalLinkage, + ConstantArray::get(EdgeTableTy, + &EdgeTable[0], Succs.size() * Preds.size()), + "__llvm_gcda_edge_table"); + EdgeTableGV->setUnnamedAddr(true); + return EdgeTableGV; +} + +Constant *GCOVProfiler::getStartFileFunc() { + const Type *Args[] = { Type::getInt8PtrTy(*Ctx) }; + const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + Args, false); + return M->getOrInsertFunction("llvm_gcda_start_file", FTy); +} + +Constant *GCOVProfiler::getIncrementIndirectCounterFunc() { + const Type *Args[] = { + Type::getInt32PtrTy(*Ctx), // uint32_t *predecessor + Type::getInt64PtrTy(*Ctx)->getPointerTo(), // uint64_t **state_table_row + }; + const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + Args, false); + return M->getOrInsertFunction("llvm_gcda_increment_indirect_counter", FTy); +} + +Constant *GCOVProfiler::getEmitFunctionFunc() { + const Type *Args[] = { Type::getInt32Ty(*Ctx) }; + const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + Args, false); + return M->getOrInsertFunction("llvm_gcda_emit_function", FTy); +} + +Constant *GCOVProfiler::getEmitArcsFunc() { + const Type *Args[] = { + Type::getInt32Ty(*Ctx), // uint32_t num_counters + Type::getInt64PtrTy(*Ctx), // uint64_t *counters + }; + const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + Args, false); + return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy); +} + +Constant *GCOVProfiler::getEndFileFunc() { + const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + return M->getOrInsertFunction("llvm_gcda_end_file", FTy); +} + +GlobalVariable *GCOVProfiler::getEdgeStateValue() { + GlobalVariable *GV = M->getGlobalVariable("__llvm_gcov_global_state_pred"); + if (!GV) { + GV = new GlobalVariable(*M, Type::getInt32Ty(*Ctx), false, + GlobalValue::InternalLinkage, + ConstantInt::get(Type::getInt32Ty(*Ctx), + 0xffffffff), + "__llvm_gcov_global_state_pred"); + GV->setUnnamedAddr(true); + } + return GV; +} + +void GCOVProfiler::insertCounterWriteout( + DebugInfoFinder &DIF, + SmallVector<std::pair<GlobalVariable *, uint32_t>, 8> &CountersByIdent) { + const FunctionType *WriteoutFTy = + FunctionType::get(Type::getVoidTy(*Ctx), false); + Function *WriteoutF = Function::Create(WriteoutFTy, + GlobalValue::InternalLinkage, + "__llvm_gcov_writeout", M); + WriteoutF->setUnnamedAddr(true); + BasicBlock *BB = BasicBlock::Create(*Ctx, "", WriteoutF); + IRBuilder<> Builder(BB); + + Constant *StartFile = getStartFileFunc(); + Constant *EmitFunction = getEmitFunctionFunc(); + Constant *EmitArcs = getEmitArcsFunc(); + Constant *EndFile = getEndFileFunc(); + + for (DebugInfoFinder::iterator CUI = DIF.compile_unit_begin(), + CUE = DIF.compile_unit_end(); CUI != CUE; ++CUI) { + DICompileUnit compile_unit(*CUI); + std::string FilenameGcda = replaceStem(compile_unit.getFilename(), "gcda"); + Builder.CreateCall(StartFile, + Builder.CreateGlobalStringPtr(FilenameGcda)); + for (SmallVector<std::pair<GlobalVariable *, uint32_t>, 8>::iterator + I = CountersByIdent.begin(), E = CountersByIdent.end(); + I != E; ++I) { + Builder.CreateCall(EmitFunction, ConstantInt::get(Type::getInt32Ty(*Ctx), + I->second)); + GlobalVariable *GV = I->first; + unsigned Arcs = + cast<ArrayType>(GV->getType()->getElementType())->getNumElements(); + Builder.CreateCall2(EmitArcs, + ConstantInt::get(Type::getInt32Ty(*Ctx), Arcs), + Builder.CreateConstGEP2_64(GV, 0, 0)); + } + Builder.CreateCall(EndFile); + } + Builder.CreateRetVoid(); + + InsertProfilingShutdownCall(WriteoutF, M); +} diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp index 96ed4fa..71adc1e 100644 --- a/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -23,6 +23,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) { initializeEdgeProfilerPass(Registry); initializeOptimalEdgeProfilerPass(Registry); initializePathProfilerPass(Registry); + initializeGCOVProfilerPass(Registry); } /// LLVMInitializeInstrumentation - C binding for diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h index 829da6b..f76c77e 100644 --- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h +++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This module privides means for calculating a maximum spanning tree for a +// This module provides means for calculating a maximum spanning tree for a // given set of weighted edges. The type parameter T is the type of a node. // //===----------------------------------------------------------------------===// diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp index c85a1a9..e09f882 100644 --- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp +++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "insert-optimal-edge-profiling" #include "ProfilingUtils.h" +#include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/Passes.h" @@ -26,7 +27,6 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Statistic.h" #include "MaximumSpanningTree.h" -#include <set> using namespace llvm; STATISTIC(NumEdgesInserted, "The # of edges inserted."); @@ -120,14 +120,14 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { NumEdgesInserted = 0; std::vector<Constant*> Initializer(NumEdges); - Constant* Zero = ConstantInt::get(Int32, 0); - Constant* Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted); + Constant *Zero = ConstantInt::get(Int32, 0); + Constant *Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted); // Instrument all of the edges not in MST... unsigned i = 0; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { if (F->isDeclaration()) continue; - DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n"); + DEBUG(dbgs() << "Working on " << F->getNameStr() << "\n"); // Calculate a Maximum Spanning Tree with the edge weights determined by // ProfileEstimator. ProfileEstimator also assign weights to the virtual @@ -139,17 +139,17 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { ProfileInfo::EdgeWeights ECs = getAnalysis<ProfileInfo>(*F).getEdgeWeights(F); std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end()); - MaximumSpanningTree<BasicBlock> MST (EdgeVector); - std::stable_sort(MST.begin(),MST.end()); + MaximumSpanningTree<BasicBlock> MST(EdgeVector); + std::stable_sort(MST.begin(), MST.end()); // Check if (0,entry) not in the MST. If not, instrument edge // (IncrementCounterInBlock()) and set the counter initially to zero, if // the edge is in the MST the counter is initialised to -1. BasicBlock *entry = &(F->getEntryBlock()); - ProfileInfo::Edge edge = ProfileInfo::getEdge(0,entry); + ProfileInfo::Edge edge = ProfileInfo::getEdge(0, entry); if (!std::binary_search(MST.begin(), MST.end(), edge)) { - printEdgeCounter(edge,entry,i); + printEdgeCounter(edge, entry, i); IncrementCounterInBlock(entry, i, Counters); ++NumEdgesInserted; Initializer[i++] = (Zero); } else{ @@ -170,9 +170,9 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { // has no successors, the virtual edge (BB,0) is processed. TerminatorInst *TI = BB->getTerminator(); if (TI->getNumSuccessors() == 0) { - ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,0); + ProfileInfo::Edge edge = ProfileInfo::getEdge(BB, 0); if (!std::binary_search(MST.begin(), MST.end(), edge)) { - printEdgeCounter(edge,BB,i); + printEdgeCounter(edge, BB, i); IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted; Initializer[i++] = (Zero); } else{ @@ -195,11 +195,11 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { // otherwise insert it in the successor block. if (TI->getNumSuccessors() == 1) { // Insert counter at the start of the block - printEdgeCounter(edge,BB,i); + printEdgeCounter(edge, BB, i); IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted; } else { // Insert counter at the start of the block - printEdgeCounter(edge,Succ,i); + printEdgeCounter(edge, Succ, i); IncrementCounterInBlock(Succ, i, Counters); ++NumEdgesInserted; } Initializer[i++] = (Zero); @@ -212,9 +212,9 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { // Check if the number of edges counted at first was the number of edges we // considered for instrumentation. - assert(i==NumEdges && "the number of edges in counting array is wrong"); + assert(i == NumEdges && "the number of edges in counting array is wrong"); - // Assing the now completely defined initialiser to the array. + // Assign the now completely defined initialiser to the array. Constant *init = ConstantArray::get(ATy, Initializer); Counters->setInitializer(init); diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp index 6449b39..6b3f12d 100644 --- a/lib/Transforms/Instrumentation/PathProfiling.cpp +++ b/lib/Transforms/Instrumentation/PathProfiling.cpp @@ -63,7 +63,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Instrumentation.h" -#include <map> #include <vector> #define HASH_THRESHHOLD 100000 @@ -259,7 +258,7 @@ private: }; // --------------------------------------------------------------------------- -// PathProfiler is a module pass which intruments path profiling instructions +// PathProfiler is a module pass which instruments path profiling instructions // --------------------------------------------------------------------------- class PathProfiler : public ModulePass { private: @@ -389,6 +388,9 @@ namespace llvm { // BallLarusEdge << operator overloading raw_ostream& operator<<(raw_ostream& os, + const BLInstrumentationEdge& edge) + LLVM_ATTRIBUTE_USED; + raw_ostream& operator<<(raw_ostream& os, const BLInstrumentationEdge& edge) { os << "[" << edge.getSource()->getName() << " -> " << edge.getTarget()->getName() << "] init: " @@ -929,14 +931,16 @@ BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value* void PathProfiler::preparePHI(BLInstrumentationNode* node) { BasicBlock* block = node->getBlock(); BasicBlock::iterator insertPoint = block->getFirstNonPHI(); - PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context), "pathNumber", + pred_iterator PB = pred_begin(node->getBlock()), + PE = pred_end(node->getBlock()); + PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context), + std::distance(PB, PE), "pathNumber", insertPoint ); node->setPathPHI(phi); node->setStartingPathNumber(phi); node->setEndingPathNumber(phi); - for(pred_iterator predIt = pred_begin(node->getBlock()), - end = pred_end(node->getBlock()); predIt != end; predIt++) { + for(pred_iterator predIt = PB; predIt != PE; predIt++) { BasicBlock* pred = (*predIt); if(pred != NULL) diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp index b57bbf6..7435bc3 100644 --- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp +++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp @@ -110,7 +110,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, GlobalValue *CounterArray, bool beginning) { // Insert the increment after any alloca or PHI instructions... BasicBlock::iterator InsertPos = beginning ? BB->getFirstNonPHI() : - BB->getTerminator(); + BB->getTerminator(); while (isa<AllocaInst>(InsertPos)) ++InsertPos; @@ -121,8 +121,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum); Constant *ElementPtr = - ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], - Indices.size()); + ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], Indices.size()); // Load, increment and store the value back. Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos); @@ -131,3 +130,41 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, "NewFuncCounter", InsertPos); new StoreInst(NewVal, ElementPtr, InsertPos); } + +void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) { + // llvm.global_dtors is an array of type { i32, void ()* }. Prepare those + // types. + const Type *GlobalDtorElems[2] = { + Type::getInt32Ty(Mod->getContext()), + FunctionType::get(Type::getVoidTy(Mod->getContext()), false)->getPointerTo() + }; + const StructType *GlobalDtorElemTy = + StructType::get(Mod->getContext(), GlobalDtorElems, false); + + // Construct the new element we'll be adding. + Constant *Elem[2] = { + ConstantInt::get(Type::getInt32Ty(Mod->getContext()), 65535), + ConstantExpr::getBitCast(Callee, GlobalDtorElems[1]) + }; + + // If llvm.global_dtors exists, make a copy of the things in its list and + // delete it, to replace it with one that has a larger array type. + std::vector<Constant *> dtors; + if (GlobalVariable *GlobalDtors = Mod->getNamedGlobal("llvm.global_dtors")) { + if (ConstantArray *InitList = + dyn_cast<ConstantArray>(GlobalDtors->getInitializer())) { + for (unsigned i = 0, e = InitList->getType()->getNumElements(); + i != e; ++i) + dtors.push_back(cast<Constant>(InitList->getOperand(i))); + } + GlobalDtors->eraseFromParent(); + } + + // Build up llvm.global_dtors with our new item in it. + GlobalVariable *GlobalDtors = new GlobalVariable( + *Mod, ArrayType::get(GlobalDtorElemTy, 1), false, + GlobalValue::AppendingLinkage, NULL, "llvm.global_dtors"); + dtors.push_back(ConstantStruct::get(Mod->getContext(), Elem, 2, false)); + GlobalDtors->setInitializer(ConstantArray::get( + cast<ArrayType>(GlobalDtors->getType()->getElementType()), dtors)); +} diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.h b/lib/Transforms/Instrumentation/ProfilingUtils.h index a76e357..09b2217 100644 --- a/lib/Transforms/Instrumentation/ProfilingUtils.h +++ b/lib/Transforms/Instrumentation/ProfilingUtils.h @@ -18,9 +18,10 @@ #define PROFILINGUTILS_H namespace llvm { + class BasicBlock; class Function; class GlobalValue; - class BasicBlock; + class Module; class PointerType; void InsertProfilingInitCall(Function *MainFn, const char *FnName, @@ -29,6 +30,7 @@ namespace llvm { void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, GlobalValue *CounterArray, bool beginning = true); + void InsertProfilingShutdownCall(Function *Callee, Module *Mod); } #endif diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index 106fb8f..fcf914f 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -7,7 +7,6 @@ add_llvm_library(LLVMScalarOpts DCE.cpp DeadStoreElimination.cpp EarlyCSE.cpp - GEPSplitter.cpp GVN.cpp IndVarSimplify.cpp JumpThreading.cpp @@ -27,7 +26,6 @@ add_llvm_library(LLVMScalarOpts Scalar.cpp ScalarReplAggregates.cpp SimplifyCFGPass.cpp - SimplifyHalfPowrLibCalls.cpp SimplifyLibCalls.cpp Sink.cpp TailDuplication.cpp diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 9536939..0184390 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -47,21 +47,21 @@ using namespace llvm; using namespace llvm::PatternMatch; STATISTIC(NumBlocksElim, "Number of blocks eliminated"); -STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); -STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); +STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); +STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps"); STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"); STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " "computations were sunk"); -STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); -STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); +STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); +STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); +STATISTIC(NumRetsDup, "Number of return instructions duplicated"); -static cl::opt<bool> -CriticalEdgeSplit("cgp-critical-edge-splitting", - cl::desc("Split critical edges during codegen prepare"), - cl::init(false), cl::Hidden); +static cl::opt<bool> DisableBranchOpts( + "disable-cgp-branch-opts", cl::Hidden, cl::init(false), + cl::desc("Disable branch optimizations in CodeGenPrepare")); namespace { class CodeGenPrepare : public FunctionPass { @@ -76,15 +76,15 @@ namespace { /// update it. BasicBlock::iterator CurInstIterator; - /// BackEdges - Keep a set of all the loop back edges. - /// - SmallSet<std::pair<const BasicBlock*, const BasicBlock*>, 8> BackEdges; - - // Keeps track of non-local addresses that have been sunk into a block. This - // allows us to avoid inserting duplicate code for blocks with multiple - // load/stores of the same address. + /// Keeps track of non-local addresses that have been sunk into a block. + /// This allows us to avoid inserting duplicate code for blocks with + /// multiple load/stores of the same address. DenseMap<Value*, Value*> SunkAddrs; + /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to + /// be updated. + bool ModifiedDT; + public: static char ID; // Pass identification, replacement for typeid explicit CodeGenPrepare(const TargetLowering *tli = 0) @@ -98,10 +98,6 @@ namespace { AU.addPreserved<ProfileInfo>(); } - virtual void releaseMemory() { - BackEdges.clear(); - } - private: bool EliminateMostlyEmptyBlocks(Function &F); bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; @@ -113,7 +109,7 @@ namespace { bool OptimizeCallInst(CallInst *CI); bool MoveExtToFormExtLoad(Instruction *I); bool OptimizeExtUses(Instruction *I); - void findLoopBackEdges(const Function &F); + bool DupRetToEnableTailCallOpts(ReturnInst *RI); }; } @@ -125,40 +121,42 @@ FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { return new CodeGenPrepare(TLI); } -/// findLoopBackEdges - Do a DFS walk to find loop back edges. -/// -void CodeGenPrepare::findLoopBackEdges(const Function &F) { - SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges; - FindFunctionBackedges(F, Edges); - - BackEdges.insert(Edges.begin(), Edges.end()); -} - - bool CodeGenPrepare::runOnFunction(Function &F) { bool EverMadeChange = false; + ModifiedDT = false; DT = getAnalysisIfAvailable<DominatorTree>(); PFI = getAnalysisIfAvailable<ProfileInfo>(); + // First pass, eliminate blocks that contain only PHI nodes and an // unconditional branch. EverMadeChange |= EliminateMostlyEmptyBlocks(F); - // Now find loop back edges, but only if they are being used to decide which - // critical edges to split. - if (CriticalEdgeSplit) - findLoopBackEdges(F); - bool MadeChange = true; while (MadeChange) { MadeChange = false; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { + BasicBlock *BB = I++; MadeChange |= OptimizeBlock(*BB); + } EverMadeChange |= MadeChange; } SunkAddrs.clear(); + if (!DisableBranchOpts) { + MadeChange = false; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + MadeChange |= ConstantFoldTerminator(BB); + + if (MadeChange) + ModifiedDT = true; + EverMadeChange |= MadeChange; + } + + if (ModifiedDT && DT) + DT->DT->recalculate(F); + return EverMadeChange; } @@ -333,7 +331,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { // The PHIs are now updated, change everything that refers to BB to use // DestBB and remove BB. BB->replaceAllUsesWith(DestBB); - if (DT) { + if (DT && !ModifiedDT) { BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); @@ -350,110 +348,6 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); } -/// FindReusablePredBB - Check all of the predecessors of the block DestPHI -/// lives in to see if there is a block that we can reuse as a critical edge -/// from TIBB. -static BasicBlock *FindReusablePredBB(PHINode *DestPHI, BasicBlock *TIBB) { - BasicBlock *Dest = DestPHI->getParent(); - - /// TIPHIValues - This array is lazily computed to determine the values of - /// PHIs in Dest that TI would provide. - SmallVector<Value*, 32> TIPHIValues; - - /// TIBBEntryNo - This is a cache to speed up pred queries for TIBB. - unsigned TIBBEntryNo = 0; - - // Check to see if Dest has any blocks that can be used as a split edge for - // this terminator. - for (unsigned pi = 0, e = DestPHI->getNumIncomingValues(); pi != e; ++pi) { - BasicBlock *Pred = DestPHI->getIncomingBlock(pi); - // To be usable, the pred has to end with an uncond branch to the dest. - BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator()); - if (!PredBr || !PredBr->isUnconditional()) - continue; - // Must be empty other than the branch and debug info. - BasicBlock::iterator I = Pred->begin(); - while (isa<DbgInfoIntrinsic>(I)) - I++; - if (&*I != PredBr) - continue; - // Cannot be the entry block; its label does not get emitted. - if (Pred == &Dest->getParent()->getEntryBlock()) - continue; - - // Finally, since we know that Dest has phi nodes in it, we have to make - // sure that jumping to Pred will have the same effect as going to Dest in - // terms of PHI values. - PHINode *PN; - unsigned PHINo = 0; - unsigned PredEntryNo = pi; - - bool FoundMatch = true; - for (BasicBlock::iterator I = Dest->begin(); - (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) { - if (PHINo == TIPHIValues.size()) { - if (PN->getIncomingBlock(TIBBEntryNo) != TIBB) - TIBBEntryNo = PN->getBasicBlockIndex(TIBB); - TIPHIValues.push_back(PN->getIncomingValue(TIBBEntryNo)); - } - - // If the PHI entry doesn't work, we can't use this pred. - if (PN->getIncomingBlock(PredEntryNo) != Pred) - PredEntryNo = PN->getBasicBlockIndex(Pred); - - if (TIPHIValues[PHINo] != PN->getIncomingValue(PredEntryNo)) { - FoundMatch = false; - break; - } - } - - // If we found a workable predecessor, change TI to branch to Succ. - if (FoundMatch) - return Pred; - } - return 0; -} - - -/// SplitEdgeNicely - Split the critical edge from TI to its specified -/// successor if it will improve codegen. We only do this if the successor has -/// phi nodes (otherwise critical edges are ok). If there is already another -/// predecessor of the succ that is empty (and thus has no phi nodes), use it -/// instead of introducing a new block. -static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, - SmallSet<std::pair<const BasicBlock*, - const BasicBlock*>, 8> &BackEdges, - Pass *P) { - BasicBlock *TIBB = TI->getParent(); - BasicBlock *Dest = TI->getSuccessor(SuccNum); - assert(isa<PHINode>(Dest->begin()) && - "This should only be called if Dest has a PHI!"); - PHINode *DestPHI = cast<PHINode>(Dest->begin()); - - // Do not split edges to EH landing pads. - if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI)) - if (Invoke->getSuccessor(1) == Dest) - return; - - // As a hack, never split backedges of loops. Even though the copy for any - // PHIs inserted on the backedge would be dead for exits from the loop, we - // assume that the cost of *splitting* the backedge would be too high. - if (BackEdges.count(std::make_pair(TIBB, Dest))) - return; - - if (BasicBlock *ReuseBB = FindReusablePredBB(DestPHI, TIBB)) { - ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>(); - if (PFI) - PFI->splitEdge(TIBB, Dest, ReuseBB); - Dest->removePredecessor(TIBB); - TI->setSuccessor(SuccNum, ReuseBB); - return; - } - - SplitCriticalEdge(TI, SuccNum, P, true); -} - - /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), /// sink it into user blocks to reduce the number of virtual @@ -640,7 +534,8 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // happens. WeakVH IterHandle(CurInstIterator); - ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, DT); + ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, + ModifiedDT ? 0 : DT); // If the iterator instruction was recursively deleted, start over at the // start of the block. @@ -666,6 +561,129 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { return Simplifier.fold(CI, TD); } +/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return +/// instructions to the predecessor to enable tail call optimizations. The +/// case it is currently looking for is: +/// bb0: +/// %tmp0 = tail call i32 @f0() +/// br label %return +/// bb1: +/// %tmp1 = tail call i32 @f1() +/// br label %return +/// bb2: +/// %tmp2 = tail call i32 @f2() +/// br label %return +/// return: +/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] +/// ret i32 %retval +/// +/// => +/// +/// bb0: +/// %tmp0 = tail call i32 @f0() +/// ret i32 %tmp0 +/// bb1: +/// %tmp1 = tail call i32 @f1() +/// ret i32 %tmp1 +/// bb2: +/// %tmp2 = tail call i32 @f2() +/// ret i32 %tmp2 +/// +bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { + if (!TLI) + return false; + + Value *V = RI->getReturnValue(); + PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL; + if (V && !PN) + return false; + + BasicBlock *BB = RI->getParent(); + if (PN && PN->getParent() != BB) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + // See llvm::isInTailCallPosition(). + const Function *F = BB->getParent(); + unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); + if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) + return false; + + // Make sure there are no instructions between the PHI and return, or that the + // return is the first instruction in the block. + if (PN) { + BasicBlock::iterator BI = BB->begin(); + do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); + if (&*BI != RI) + return false; + } else { + BasicBlock::iterator BI = BB->begin(); + while (isa<DbgInfoIntrinsic>(BI)) ++BI; + if (&*BI != RI) + return false; + } + + /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail + /// call. + SmallVector<CallInst*, 4> TailCalls; + if (PN) { + for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { + CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); + // Make sure the phi value is indeed produced by the tail call. + if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && + TLI->mayBeEmittedAsTailCall(CI)) + TailCalls.push_back(CI); + } + } else { + SmallPtrSet<BasicBlock*, 4> VisitedBBs; + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { + if (!VisitedBBs.insert(*PI)) + continue; + + BasicBlock::InstListType &InstList = (*PI)->getInstList(); + BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); + BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); + do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI)); + if (RI == RE) + continue; + + CallInst *CI = dyn_cast<CallInst>(&*RI); + if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI)) + TailCalls.push_back(CI); + } + } + + bool Changed = false; + for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { + CallInst *CI = TailCalls[i]; + CallSite CS(CI); + + // Conservatively require the attributes of the call to match those of the + // return. Ignore noalias because it doesn't affect the call sequence. + unsigned CalleeRetAttr = CS.getAttributes().getRetAttributes(); + if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) + continue; + + // Make sure the call instruction is followed by an unconditional branch to + // the return block. + BasicBlock *CallBB = CI->getParent(); + BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); + if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) + continue; + + // Duplicate the return into CallBB. + (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); + ModifiedDT = Changed = true; + ++NumRetsDup; + } + + // If we eliminated all predecessors of the block, delete the block now. + if (Changed && pred_begin(BB) == pred_end(BB)) + BB->eraseFromParent(); + + return Changed; +} + //===----------------------------------------------------------------------===// // Memory Optimization //===----------------------------------------------------------------------===// @@ -701,7 +719,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // the addressing mode obtained from the non-PHI roots of the graph // are equivalent. Value *Consensus = 0; - unsigned NumUses = 0; + unsigned NumUsesConsensus = 0; + bool IsNumUsesConsensusValid = false; SmallVector<Instruction*, 16> AddrModeInsts; ExtAddrMode AddrMode; while (!worklist.empty()) { @@ -728,16 +747,31 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(V, AccessTy,MemoryInst, NewAddrModeInsts, *TLI); - - // Ensure that the obtained addressing mode is equivalent to that obtained - // for all other roots of the PHI traversal. Also, when choosing one - // such root as representative, select the one with the most uses in order - // to keep the cost modeling heuristics in AddressingModeMatcher applicable. - if (!Consensus || NewAddrMode == AddrMode) { - if (V->getNumUses() > NumUses) { + + // This check is broken into two cases with very similar code to avoid using + // getNumUses() as much as possible. Some values have a lot of uses, so + // calling getNumUses() unconditionally caused a significant compile-time + // regression. + if (!Consensus) { + Consensus = V; + AddrMode = NewAddrMode; + AddrModeInsts = NewAddrModeInsts; + continue; + } else if (NewAddrMode == AddrMode) { + if (!IsNumUsesConsensusValid) { + NumUsesConsensus = Consensus->getNumUses(); + IsNumUsesConsensusValid = true; + } + + // Ensure that the obtained addressing mode is equivalent to that obtained + // for all other roots of the PHI traversal. Also, when choosing one + // such root as representative, select the one with the most uses in order + // to keep the cost modeling heuristics in AddressingModeMatcher + // applicable. + unsigned NumUses = V->getNumUses(); + if (NumUses > NumUsesConsensus) { Consensus = V; - NumUses = V->getNumUses(); - AddrMode = NewAddrMode; + NumUsesConsensus = NumUses; AddrModeInsts = NewAddrModeInsts; } continue; @@ -855,11 +889,26 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, MemoryInst->replaceUsesOfWith(Repl, SunkAddr); + // If we have no uses, recursively delete the value and all dead instructions + // using it. if (Repl->use_empty()) { + // This can cause recursive deletion, which can invalidate our iterator. + // Use a WeakVH to hold onto it in case this happens. + WeakVH IterHandle(CurInstIterator); + BasicBlock *BB = CurInstIterator->getParent(); + RecursivelyDeleteTriviallyDeadInstructions(Repl); - // This address is now available for reassignment, so erase the table entry; - // we don't want to match some completely different instruction. - SunkAddrs[Addr] = 0; + + if (IterHandle != CurInstIterator) { + // If the iterator instruction was recursively deleted, start over at the + // start of the block. + CurInstIterator = BB->begin(); + SunkAddrs.clear(); + } else { + // This address is now available for reassignment, so erase the table + // entry; we don't want to match some completely different instruction. + SunkAddrs[Addr] = 0; + } } ++NumMemoryInsts; return true; @@ -1073,6 +1122,9 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { if (CallInst *CI = dyn_cast<CallInst>(I)) return OptimizeCallInst(CI); + if (ReturnInst *RI = dyn_cast<ReturnInst>(I)) + return DupRetToEnableTailCallOpts(RI); + return false; } @@ -1080,21 +1132,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { // across basic blocks and rewrite them to improve basic-block-at-a-time // selection. bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { - bool MadeChange = false; - - // Split all critical edges where the dest block has a PHI. - if (CriticalEdgeSplit) { - TerminatorInst *BBTI = BB.getTerminator(); - if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) { - for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) { - BasicBlock *SuccBB = BBTI->getSuccessor(i); - if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true)) - SplitEdgeNicely(BBTI, i, BackEdges, this); - } - } - } - SunkAddrs.clear(); + bool MadeChange = false; CurInstIterator = BB.begin(); for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; ) diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index be12973..e275268 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -13,6 +13,7 @@ #define DEBUG_TYPE "correlated-value-propagation" #include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/Pass.h" diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index dbb68f3..8dbcc23 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -23,7 +23,6 @@ #include "llvm/Pass.h" #include "llvm/Support/InstIterator.h" #include "llvm/ADT/Statistic.h" -#include <set> using namespace llvm; STATISTIC(DIEEliminated, "Number of insts removed by DIE pass"); diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 867a06a..53e4640 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -340,24 +340,35 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, // Okay, we have stores to two completely different pointers. Try to // decompose the pointer into a "base + constant_offset" form. If the base // pointers are equal, then we can reason about the two stores. - int64_t Off1 = 0, Off2 = 0; - const Value *BP1 = GetPointerBaseWithConstantOffset(P1, Off1, TD); - const Value *BP2 = GetPointerBaseWithConstantOffset(P2, Off2, TD); + int64_t EarlierOff = 0, LaterOff = 0; + const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD); + const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD); // If the base pointers still differ, we have two completely different stores. if (BP1 != BP2) return false; - - // Otherwise, we might have a situation like: - // store i16 -> P + 1 Byte - // store i32 -> P - // In this case, we see if the later store completely overlaps all bytes - // stored by the previous store. - if (Off1 < Off2 || // Earlier starts before Later. - Off1+Earlier.Size > Off2+Later.Size) // Earlier goes beyond Later. - return false; - // Otherwise, we have complete overlap. - return true; + + // The later store completely overlaps the earlier store if: + // + // 1. Both start at the same offset and the later one's size is greater than + // or equal to the earlier one's, or + // + // |--earlier--| + // |-- later --| + // + // 2. The earlier store has an offset greater than the later offset, but which + // still lies completely within the later store. + // + // |--earlier--| + // |----- later ------| + // + // We have to be careful here as *Off is signed while *.Size is unsigned. + if (EarlierOff >= LaterOff && + uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size) + return true; + + // Otherwise, they don't completely overlap. + return false; } /// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a @@ -474,7 +485,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // away the store and we bail out. However, if we depend on on something // that overwrites the memory location we *can* potentially optimize it. // - // Find out what memory location the dependant instruction stores. + // Find out what memory location the dependent instruction stores. Instruction *DepWrite = InstDep.getInst(); AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA); // If we didn't get a useful location, or if it isn't a size, bail out. @@ -631,28 +642,15 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (AA->doesNotAccessMemory(CS)) continue; - unsigned NumModRef = 0, NumOther = 0; - // If the call might load from any of our allocas, then any store above // the call is live. SmallVector<Value*, 8> LiveAllocas; for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(), E = DeadStackObjects.end(); I != E; ++I) { - // If we detect that our AA is imprecise, it's not worth it to scan the - // rest of the DeadPointers set. Just assume that the AA will return - // ModRef for everything, and go ahead and bail out. - if (NumModRef >= 16 && NumOther == 0) - return MadeChange; - // See if the call site touches it. AliasAnalysis::ModRefResult A = AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA)); - if (A == AliasAnalysis::ModRef) - ++NumModRef; - else - ++NumOther; - if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref) LiveAllocas.push_back(*I); } diff --git a/lib/Transforms/Scalar/GEPSplitter.cpp b/lib/Transforms/Scalar/GEPSplitter.cpp deleted file mode 100644 index 4c3d188..0000000 --- a/lib/Transforms/Scalar/GEPSplitter.cpp +++ /dev/null @@ -1,83 +0,0 @@ -//===- GEPSplitter.cpp - Split complex GEPs into simple ones --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This function breaks GEPs with more than 2 non-zero operands into smaller -// GEPs each with no more than 2 non-zero operands. This exposes redundancy -// between GEPs with common initial operand sequences. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "split-geps" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Pass.h" -using namespace llvm; - -namespace { - class GEPSplitter : public FunctionPass { - virtual bool runOnFunction(Function &F); - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - public: - static char ID; // Pass identification, replacement for typeid - explicit GEPSplitter() : FunctionPass(ID) { - initializeGEPSplitterPass(*PassRegistry::getPassRegistry()); - } - }; -} - -char GEPSplitter::ID = 0; -INITIALIZE_PASS(GEPSplitter, "split-geps", - "split complex GEPs into simple GEPs", false, false) - -FunctionPass *llvm::createGEPSplitterPass() { - return new GEPSplitter(); -} - -bool GEPSplitter::runOnFunction(Function &F) { - bool Changed = false; - - // Visit each GEP instruction. - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) - for (BasicBlock::iterator II = I->begin(), IE = I->end(); II != IE; ) - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(II++)) { - unsigned NumOps = GEP->getNumOperands(); - // Ignore GEPs which are already simple. - if (NumOps <= 2) - continue; - bool FirstIndexIsZero = isa<ConstantInt>(GEP->getOperand(1)) && - cast<ConstantInt>(GEP->getOperand(1))->isZero(); - if (NumOps == 3 && FirstIndexIsZero) - continue; - // The first index is special and gets expanded with a 2-operand GEP - // (unless it's zero, in which case we can skip this). - Value *NewGEP = FirstIndexIsZero ? - GEP->getOperand(0) : - GetElementPtrInst::Create(GEP->getOperand(0), GEP->getOperand(1), - "tmp", GEP); - // All remaining indices get expanded with a 3-operand GEP with zero - // as the second operand. - Value *Idxs[2]; - Idxs[0] = ConstantInt::get(Type::getInt64Ty(F.getContext()), 0); - for (unsigned i = 2; i != NumOps; ++i) { - Idxs[1] = GEP->getOperand(i); - NewGEP = GetElementPtrInst::Create(NewGEP, Idxs, Idxs+2, "tmp", GEP); - } - GEP->replaceAllUsesWith(NewGEP); - GEP->eraseFromParent(); - Changed = true; - } - - return Changed; -} - -void GEPSplitter::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); -} diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index a0123f5..efecb97 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -63,50 +63,48 @@ static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); namespace { struct Expression { uint32_t opcode; - const Type* type; + const Type *type; SmallVector<uint32_t, 4> varargs; - Expression() { } - Expression(uint32_t o) : opcode(o) { } + Expression(uint32_t o = ~2U) : opcode(o) { } bool operator==(const Expression &other) const { if (opcode != other.opcode) return false; - else if (opcode == ~0U || opcode == ~1U) + if (opcode == ~0U || opcode == ~1U) return true; - else if (type != other.type) + if (type != other.type) return false; - else if (varargs != other.varargs) + if (varargs != other.varargs) return false; return true; } }; class ValueTable { - private: - DenseMap<Value*, uint32_t> valueNumbering; - DenseMap<Expression, uint32_t> expressionNumbering; - AliasAnalysis* AA; - MemoryDependenceAnalysis* MD; - DominatorTree* DT; - - uint32_t nextValueNumber; - - Expression create_expression(Instruction* I); - uint32_t lookup_or_add_call(CallInst* C); - public: - ValueTable() : nextValueNumber(1) { } - uint32_t lookup_or_add(Value *V); - uint32_t lookup(Value *V) const; - void add(Value *V, uint32_t num); - void clear(); - void erase(Value *v); - void setAliasAnalysis(AliasAnalysis* A) { AA = A; } - AliasAnalysis *getAliasAnalysis() const { return AA; } - void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } - void setDomTree(DominatorTree* D) { DT = D; } - uint32_t getNextUnusedValueNumber() { return nextValueNumber; } - void verifyRemoved(const Value *) const; + DenseMap<Value*, uint32_t> valueNumbering; + DenseMap<Expression, uint32_t> expressionNumbering; + AliasAnalysis *AA; + MemoryDependenceAnalysis *MD; + DominatorTree *DT; + + uint32_t nextValueNumber; + + Expression create_expression(Instruction* I); + uint32_t lookup_or_add_call(CallInst* C); + public: + ValueTable() : nextValueNumber(1) { } + uint32_t lookup_or_add(Value *V); + uint32_t lookup(Value *V) const; + void add(Value *V, uint32_t num); + void clear(); + void erase(Value *v); + void setAliasAnalysis(AliasAnalysis* A) { AA = A; } + AliasAnalysis *getAliasAnalysis() const { return AA; } + void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } + void setDomTree(DominatorTree* D) { DT = D; } + uint32_t getNextUnusedValueNumber() { return nextValueNumber; } + void verifyRemoved(const Value *) const; }; } @@ -364,14 +362,14 @@ uint32_t ValueTable::lookup(Value *V) const { return VI->second; } -/// clear - Remove all entries from the ValueTable +/// clear - Remove all entries from the ValueTable. void ValueTable::clear() { valueNumbering.clear(); expressionNumbering.clear(); nextValueNumber = 1; } -/// erase - Remove a value from the value numbering +/// erase - Remove a value from the value numbering. void ValueTable::erase(Value *V) { valueNumbering.erase(V); } @@ -392,20 +390,11 @@ void ValueTable::verifyRemoved(const Value *V) const { namespace { class GVN : public FunctionPass { - bool runOnFunction(Function &F); - public: - static char ID; // Pass identification, replacement for typeid - explicit GVN(bool noloads = false) - : FunctionPass(ID), NoLoads(noloads), MD(0) { - initializeGVNPass(*PassRegistry::getPassRegistry()); - } - - private: bool NoLoads; MemoryDependenceAnalysis *MD; DominatorTree *DT; - const TargetData* TD; - + const TargetData *TD; + ValueTable VN; /// LeaderTable - A mapping from value numbers to lists of Value*'s that @@ -418,17 +407,39 @@ namespace { DenseMap<uint32_t, LeaderTableEntry> LeaderTable; BumpPtrAllocator TableAllocator; + SmallVector<Instruction*, 8> InstrsToErase; + public: + static char ID; // Pass identification, replacement for typeid + explicit GVN(bool noloads = false) + : FunctionPass(ID), NoLoads(noloads), MD(0) { + initializeGVNPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F); + + /// markInstructionForDeletion - This removes the specified instruction from + /// our various maps and marks it for deletion. + void markInstructionForDeletion(Instruction *I) { + VN.erase(I); + InstrsToErase.push_back(I); + } + + const TargetData *getTargetData() const { return TD; } + DominatorTree &getDominatorTree() const { return *DT; } + AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); } + MemoryDependenceAnalysis &getMemDep() const { return *MD; } + private: /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for /// its value number. void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) { - LeaderTableEntry& Curr = LeaderTable[N]; + LeaderTableEntry &Curr = LeaderTable[N]; if (!Curr.Val) { Curr.Val = V; Curr.BB = BB; return; } - LeaderTableEntry* Node = TableAllocator.Allocate<LeaderTableEntry>(); + LeaderTableEntry *Node = TableAllocator.Allocate<LeaderTableEntry>(); Node->Val = V; Node->BB = BB; Node->Next = Curr.Next; @@ -474,19 +485,17 @@ namespace { AU.addPreserved<DominatorTree>(); AU.addPreserved<AliasAnalysis>(); } + // Helper fuctions // FIXME: eliminate or document these better - bool processLoad(LoadInst* L, - SmallVectorImpl<Instruction*> &toErase); - bool processInstruction(Instruction *I, - SmallVectorImpl<Instruction*> &toErase); - bool processNonLocalLoad(LoadInst* L, - SmallVectorImpl<Instruction*> &toErase); + bool processLoad(LoadInst *L); + bool processInstruction(Instruction *I); + bool processNonLocalLoad(LoadInst *L); bool processBlock(BasicBlock *BB); - void dump(DenseMap<uint32_t, Value*>& d); + void dump(DenseMap<uint32_t, Value*> &d); bool iterateOnFunction(Function &F); - bool performPRE(Function& F); + bool performPRE(Function &F); Value *findLeader(BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; @@ -629,17 +638,17 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD)) return 0; + // If this is already the right type, just return it. const Type *StoredValTy = StoredVal->getType(); uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy); - uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); + uint64_t LoadSize = TD.getTypeStoreSizeInBits(LoadedTy); // If the store and reload are the same size, we can always reuse it. if (StoreSize == LoadSize) { - if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) { - // Pointer to Pointer -> use bitcast. + // Pointer to Pointer -> use bitcast. + if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); - } // Convert source pointers to integers, which can be bitcast. if (StoredValTy->isPointerTy()) { @@ -796,6 +805,36 @@ static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr, StorePtr, StoreSize, TD); } +/// AnalyzeLoadFromClobberingLoad - This function is called when we have a +/// memdep query of a load that ends up being clobbered by another load. See if +/// the other load can feed into the second load. +static int AnalyzeLoadFromClobberingLoad(const Type *LoadTy, Value *LoadPtr, + LoadInst *DepLI, const TargetData &TD){ + // Cannot handle reading from store of first-class aggregate yet. + if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) + return -1; + + Value *DepPtr = DepLI->getPointerOperand(); + uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType()); + int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD); + if (R != -1) return R; + + // If we have a load/load clobber an DepLI can be widened to cover this load, + // then we should widen it! + int64_t LoadOffs = 0; + const Value *LoadBase = + GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD); + unsigned LoadSize = TD.getTypeStoreSize(LoadTy); + + unsigned Size = MemoryDependenceAnalysis:: + getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD); + if (Size == 0) return -1; + + return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD); +} + + + static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, MemIntrinsic *MI, const TargetData &TD) { @@ -843,9 +882,9 @@ static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, /// GetStoreValueForLoad - This function is called when we have a /// memdep query of a load that ends up being a clobbering store. This means -/// that the store *may* provide bits used by the load but we can't be sure -/// because the pointers don't mustalias. Check this case to see if there is -/// anything more we can do before we give up. +/// that the store provides bits used by the load but we the pointers don't +/// mustalias. Check this case to see if there is anything more we can do +/// before we give up. static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, const Type *LoadTy, Instruction *InsertPt, const TargetData &TD){ @@ -881,6 +920,69 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); } +/// GetStoreValueForLoad - This function is called when we have a +/// memdep query of a load that ends up being a clobbering load. This means +/// that the load *may* provide bits used by the load but we can't be sure +/// because the pointers don't mustalias. Check this case to see if there is +/// anything more we can do before we give up. +static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, + const Type *LoadTy, Instruction *InsertPt, + GVN &gvn) { + const TargetData &TD = *gvn.getTargetData(); + // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to + // widen SrcVal out to a larger load. + unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType()); + unsigned LoadSize = TD.getTypeStoreSize(LoadTy); + if (Offset+LoadSize > SrcValSize) { + assert(!SrcVal->isVolatile() && "Cannot widen volatile load!"); + assert(isa<IntegerType>(SrcVal->getType())&&"Can't widen non-integer load"); + // If we have a load/load clobber an DepLI can be widened to cover this + // load, then we should widen it to the next power of 2 size big enough! + unsigned NewLoadSize = Offset+LoadSize; + if (!isPowerOf2_32(NewLoadSize)) + NewLoadSize = NextPowerOf2(NewLoadSize); + + Value *PtrVal = SrcVal->getPointerOperand(); + + // Insert the new load after the old load. This ensures that subsequent + // memdep queries will find the new load. We can't easily remove the old + // load completely because it is already in the value numbering table. + IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal)); + const Type *DestPTy = + IntegerType::get(LoadTy->getContext(), NewLoadSize*8); + DestPTy = PointerType::get(DestPTy, + cast<PointerType>(PtrVal->getType())->getAddressSpace()); + + PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); + LoadInst *NewLoad = Builder.CreateLoad(PtrVal); + NewLoad->takeName(SrcVal); + NewLoad->setAlignment(SrcVal->getAlignment()); + + DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); + DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); + + // Replace uses of the original load with the wider load. On a big endian + // system, we need to shift down to get the relevant bits. + Value *RV = NewLoad; + if (TD.isBigEndian()) + RV = Builder.CreateLShr(RV, + NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits()); + RV = Builder.CreateTrunc(RV, SrcVal->getType()); + SrcVal->replaceAllUsesWith(RV); + + // We would like to use gvn.markInstructionForDeletion here, but we can't + // because the load is already memoized into the leader map table that GVN + // tracks. It is potentially possible to remove the load from the table, + // but then there all of the operations based on it would need to be + // rehashed. Just leave the dead load around. + gvn.getMemDep().removeInstruction(SrcVal); + SrcVal = NewLoad; + } + + return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD); +} + + /// GetMemInstValueForLoad - This function is called when we have a /// memdep query of a load that ends up being a clobbering mem intrinsic. static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, @@ -943,11 +1045,12 @@ struct AvailableValueInBlock { BasicBlock *BB; enum ValType { SimpleVal, // A simple offsetted value that is accessed. + LoadVal, // A value produced by a load. MemIntrin // A memory intrinsic which is loaded from. }; /// V - The value that is live out of the block. - PointerIntPair<Value *, 1, ValType> Val; + PointerIntPair<Value *, 2, ValType> Val; /// Offset - The byte offset in Val that is interesting for the load query. unsigned Offset; @@ -972,37 +1075,69 @@ struct AvailableValueInBlock { return Res; } + static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI, + unsigned Offset = 0) { + AvailableValueInBlock Res; + Res.BB = BB; + Res.Val.setPointer(LI); + Res.Val.setInt(LoadVal); + Res.Offset = Offset; + return Res; + } + bool isSimpleValue() const { return Val.getInt() == SimpleVal; } + bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; } + bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; } + Value *getSimpleValue() const { assert(isSimpleValue() && "Wrong accessor"); return Val.getPointer(); } + LoadInst *getCoercedLoadValue() const { + assert(isCoercedLoadValue() && "Wrong accessor"); + return cast<LoadInst>(Val.getPointer()); + } + MemIntrinsic *getMemIntrinValue() const { - assert(!isSimpleValue() && "Wrong accessor"); + assert(isMemIntrinValue() && "Wrong accessor"); return cast<MemIntrinsic>(Val.getPointer()); } /// MaterializeAdjustedValue - Emit code into this block to adjust the value /// defined here to the specified type. This handles various coercion cases. - Value *MaterializeAdjustedValue(const Type *LoadTy, - const TargetData *TD) const { + Value *MaterializeAdjustedValue(const Type *LoadTy, GVN &gvn) const { Value *Res; if (isSimpleValue()) { Res = getSimpleValue(); if (Res->getType() != LoadTy) { + const TargetData *TD = gvn.getTargetData(); assert(TD && "Need target data to handle type mismatch case"); Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), *TD); - DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " + DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " << *getSimpleValue() << '\n' << *Res << '\n' << "\n\n\n"); } + } else if (isCoercedLoadValue()) { + LoadInst *Load = getCoercedLoadValue(); + if (Load->getType() == LoadTy && Offset == 0) { + Res = Load; + } else { + Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(), + gvn); + + DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " + << *getCoercedLoadValue() << '\n' + << *Res << '\n' << "\n\n\n"); + } } else { + const TargetData *TD = gvn.getTargetData(); + assert(TD && "Need target data to handle type mismatch case"); Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy, BB->getTerminator(), *TD); - DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset + DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset << " " << *getMemIntrinValue() << '\n' << *Res << '\n' << "\n\n\n"); } @@ -1010,21 +1145,20 @@ struct AvailableValueInBlock { } }; -} +} // end anonymous namespace /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, /// construct SSA form, allowing us to eliminate LI. This returns the value /// that should be used at LI's definition site. static Value *ConstructSSAForLoadSet(LoadInst *LI, SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, - const TargetData *TD, - const DominatorTree &DT, - AliasAnalysis *AA) { + GVN &gvn) { // Check for the fully redundant, dominating load case. In this case, we can // just use the dominating value directly. if (ValuesPerBlock.size() == 1 && - DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent())) - return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD); + gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB, + LI->getParent())) + return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn); // Otherwise, we have to construct SSA form. SmallVector<PHINode*, 8> NewPHIs; @@ -1040,14 +1174,16 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, if (SSAUpdate.HasValueForBlock(BB)) continue; - SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD)); + SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn)); } // Perform PHI construction. Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); // If new PHI nodes were created, notify alias analysis. - if (V->getType()->isPointerTy()) + if (V->getType()->isPointerTy()) { + AliasAnalysis *AA = gvn.getAliasAnalysis(); + for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) AA->copyValue(LI, NewPHIs[i]); @@ -1059,6 +1195,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) AA->addEscapingUse(P->getOperandUse(2*ii)); } + } return V; } @@ -1071,8 +1208,7 @@ static bool isLifetimeStart(const Instruction *Inst) { /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are /// non-local by performing PHI construction. -bool GVN::processNonLocalLoad(LoadInst *LI, - SmallVectorImpl<Instruction*> &toErase) { +bool GVN::processNonLocalLoad(LoadInst *LI) { // Find the non-local dependencies of the load. SmallVector<NonLocalDepResult, 64> Deps; AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI); @@ -1088,7 +1224,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // If we had a phi translation failure, we'll have a single entry which is a // clobber in the current block. Reject this early. - if (Deps.size() == 1 && Deps[0].getResult().isClobber()) { + if (Deps.size() == 1 && Deps[0].getResult().isClobber() && + Deps[0].getResult().getInst()->getParent() == LI->getParent()) { DEBUG( dbgs() << "GVN: non-local load "; WriteAsOperand(dbgs(), LI); @@ -1129,6 +1266,26 @@ bool GVN::processNonLocalLoad(LoadInst *LI, } } } + + // Check to see if we have something like this: + // load i32* P + // load i8* (P+1) + // if we have this, replace the later with an extraction from the former. + if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) { + // If this is a clobber and L is the first instruction in its block, then + // we have the first instruction in the entry block. + if (DepLI != LI && Address && TD) { + int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), + LI->getPointerOperand(), + DepLI, *TD); + + if (Offset != -1) { + ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI, + Offset)); + continue; + } + } + } // If the clobbering value is a memset/memcpy/memmove, see if we can // forward a value on from it. @@ -1187,7 +1344,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, continue; } } - ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD)); + ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD)); continue; } @@ -1206,16 +1363,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI, DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); // Perform PHI construction. - Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, - VN.getAliasAnalysis()); + Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); LI->replaceAllUsesWith(V); if (isa<PHINode>(V)) V->takeName(LI); if (V->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); - VN.erase(LI); - toErase.push_back(LI); + markInstructionForDeletion(LI); ++NumGVNLoad; return true; } @@ -1429,22 +1584,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI, } // Perform PHI construction. - Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, - VN.getAliasAnalysis()); + Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); LI->replaceAllUsesWith(V); if (isa<PHINode>(V)) V->takeName(LI); if (V->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); - VN.erase(LI); - toErase.push_back(LI); + markInstructionForDeletion(LI); ++NumPRELoad; return true; } /// processLoad - Attempt to eliminate a load, first by eliminating it /// locally, and then attempting non-local elimination if that fails. -bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { +bool GVN::processLoad(LoadInst *L) { if (!MD) return false; @@ -1454,8 +1607,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // ... to a pointer that has been loaded from before... MemDepResult Dep = MD->getDependency(L); - // If the value isn't available, don't do anything! - if (Dep.isClobber()) { + // If we have a clobber and target data is around, see if this is a clobber + // that we can fix up through code synthesis. + if (Dep.isClobber() && TD) { // Check to see if we have something like this: // store i32 123, i32* %P // %A = bitcast i32* %P to i8* @@ -1467,26 +1621,40 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // completely covers this load. This sort of thing can happen in bitfield // access code. Value *AvailVal = 0; - if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) - if (TD) { - int Offset = AnalyzeLoadFromClobberingStore(L->getType(), - L->getPointerOperand(), - DepSI, *TD); - if (Offset != -1) - AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, - L->getType(), L, *TD); - } + if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) { + int Offset = AnalyzeLoadFromClobberingStore(L->getType(), + L->getPointerOperand(), + DepSI, *TD); + if (Offset != -1) + AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, + L->getType(), L, *TD); + } + + // Check to see if we have something like this: + // load i32* P + // load i8* (P+1) + // if we have this, replace the later with an extraction from the former. + if (LoadInst *DepLI = dyn_cast<LoadInst>(Dep.getInst())) { + // If this is a clobber and L is the first instruction in its block, then + // we have the first instruction in the entry block. + if (DepLI == L) + return false; + + int Offset = AnalyzeLoadFromClobberingLoad(L->getType(), + L->getPointerOperand(), + DepLI, *TD); + if (Offset != -1) + AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this); + } // If the clobbering value is a memset/memcpy/memmove, see if we can forward // a value on from it. if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) { - if (TD) { - int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), - L->getPointerOperand(), - DepMI, *TD); - if (Offset != -1) - AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD); - } + int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), + L->getPointerOperand(), + DepMI, *TD); + if (Offset != -1) + AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *TD); } if (AvailVal) { @@ -1497,14 +1665,16 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { L->replaceAllUsesWith(AvailVal); if (AvailVal->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(AvailVal); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } - + } + + // If the value isn't available, don't do anything! + if (Dep.isClobber()) { DEBUG( - // fast print dep, using operator<< on instruction would be too slow + // fast print dep, using operator<< on instruction is too slow. dbgs() << "GVN: load "; WriteAsOperand(dbgs(), L); Instruction *I = Dep.getInst(); @@ -1515,7 +1685,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // If it is defined in another block, try harder. if (Dep.isNonLocal()) - return processNonLocalLoad(L, toErase); + return processNonLocalLoad(L); Instruction *DepInst = Dep.getInst(); if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { @@ -1542,8 +1712,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { L->replaceAllUsesWith(StoredVal); if (StoredVal->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(StoredVal); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } @@ -1556,7 +1725,8 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // (depending on its type). if (DepLI->getType() != L->getType()) { if (TD) { - AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); + AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), + L, *TD); if (AvailableVal == 0) return false; @@ -1571,8 +1741,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { L->replaceAllUsesWith(AvailableVal); if (DepLI->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(DepLI); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } @@ -1582,19 +1751,17 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // intervening stores, for example. if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) { L->replaceAllUsesWith(UndefValue::get(L->getType())); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } // If this load occurs either right after a lifetime begin, // then the loaded value is undefined. - if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DepInst)) { if (II->getIntrinsicID() == Intrinsic::lifetime_start) { L->replaceAllUsesWith(UndefValue::get(L->getType())); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } @@ -1634,8 +1801,7 @@ Value *GVN::findLeader(BasicBlock *BB, uint32_t num) { /// processInstruction - When calculating availability, handle an instruction /// by inserting it into the appropriate sets -bool GVN::processInstruction(Instruction *I, - SmallVectorImpl<Instruction*> &toErase) { +bool GVN::processInstruction(Instruction *I) { // Ignore dbg info intrinsics. if (isa<DbgInfoIntrinsic>(I)) return false; @@ -1648,20 +1814,17 @@ bool GVN::processInstruction(Instruction *I, I->replaceAllUsesWith(V); if (MD && V->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); - VN.erase(I); - toErase.push_back(I); + markInstructionForDeletion(I); return true; } if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - bool Changed = processLoad(LI, toErase); - - if (!Changed) { - unsigned Num = VN.lookup_or_add(LI); - addToLeaderTable(Num, LI, LI->getParent()); - } + if (processLoad(LI)) + return true; - return Changed; + unsigned Num = VN.lookup_or_add(LI); + addToLeaderTable(Num, LI, LI->getParent()); + return false; } // For conditions branches, we can perform simple conditional propagation on @@ -1720,11 +1883,10 @@ bool GVN::processInstruction(Instruction *I, } // Remove it! - VN.erase(I); I->replaceAllUsesWith(repl); if (MD && repl->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(repl); - toErase.push_back(I); + markInstructionForDeletion(I); return true; } @@ -1781,35 +1943,36 @@ bool GVN::runOnFunction(Function& F) { bool GVN::processBlock(BasicBlock *BB) { - // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and - // incrementing BI before processing an instruction). - SmallVector<Instruction*, 8> toErase; + // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function + // (and incrementing BI before processing an instruction). + assert(InstrsToErase.empty() && + "We expect InstrsToErase to be empty across iterations"); bool ChangedFunction = false; for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { - ChangedFunction |= processInstruction(BI, toErase); - if (toErase.empty()) { + ChangedFunction |= processInstruction(BI); + if (InstrsToErase.empty()) { ++BI; continue; } // If we need some instructions deleted, do it now. - NumGVNInstr += toErase.size(); + NumGVNInstr += InstrsToErase.size(); // Avoid iterator invalidation. bool AtStart = BI == BB->begin(); if (!AtStart) --BI; - for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(), - E = toErase.end(); I != E; ++I) { + for (SmallVector<Instruction*, 4>::iterator I = InstrsToErase.begin(), + E = InstrsToErase.end(); I != E; ++I) { DEBUG(dbgs() << "GVN removed: " << **I << '\n'); if (MD) MD->removeInstruction(*I); (*I)->eraseFromParent(); DEBUG(verifyRemoved(*I)); } - toErase.clear(); + InstrsToErase.clear(); if (AtStart) BI = BB->begin(); @@ -1944,11 +2107,11 @@ bool GVN::performPRE(Function &F) { addToLeaderTable(ValNo, PREInstr, PREPred); // Create a PHI to make the value available in this block. - PHINode* Phi = PHINode::Create(CurInst->getType(), + pred_iterator PB = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); + PHINode* Phi = PHINode::Create(CurInst->getType(), std::distance(PB, PE), CurInst->getName() + ".pre-phi", CurrentBlock->begin()); - for (pred_iterator PI = pred_begin(CurrentBlock), - PE = pred_end(CurrentBlock); PI != PE; ++PI) { + for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; Phi->addIncoming(predMap[P], P); } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 0fb6798..09d569a 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -73,6 +73,7 @@ namespace { LoopInfo *LI; ScalarEvolution *SE; DominatorTree *DT; + SmallVector<WeakVH, 16> DeadInsts; bool Changed; public: @@ -98,6 +99,7 @@ namespace { } private: + bool isValidRewrite(Value *FromVal, Value *ToVal); void EliminateIVComparisons(); void EliminateIVRemainders(); @@ -134,6 +136,53 @@ Pass *llvm::createIndVarSimplifyPass() { return new IndVarSimplify(); } +/// isValidRewrite - Return true if the SCEV expansion generated by the +/// rewriter can replace the original value. SCEV guarantees that it +/// produces the same value, but the way it is produced may be illegal IR. +/// Ideally, this function will only be called for verification. +bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { + // If an SCEV expression subsumed multiple pointers, its expansion could + // reassociate the GEP changing the base pointer. This is illegal because the + // final address produced by a GEP chain must be inbounds relative to its + // underlying object. Otherwise basic alias analysis, among other things, + // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid + // producing an expression involving multiple pointers. Until then, we must + // bail out here. + // + // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject + // because it understands lcssa phis while SCEV does not. + Value *FromPtr = FromVal; + Value *ToPtr = ToVal; + if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) { + FromPtr = GEP->getPointerOperand(); + } + if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) { + ToPtr = GEP->getPointerOperand(); + } + if (FromPtr != FromVal || ToPtr != ToVal) { + // Quickly check the common case + if (FromPtr == ToPtr) + return true; + + // SCEV may have rewritten an expression that produces the GEP's pointer + // operand. That's ok as long as the pointer operand has the same base + // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the + // base of a recurrence. This handles the case in which SCEV expansion + // converts a pointer type recurrence into a nonrecurrent pointer base + // indexed by an integer recurrence. + const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); + const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); + if (FromBase == ToBase) + return true; + + DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " + << *FromBase << " != " << *ToBase << "\n"); + + return false; + } + return true; +} + /// LinearFunctionTestReplace - This method rewrites the exit condition of the /// loop to be a canonical != comparison against the incremented loop induction /// variable. This pass is able to rewrite the exit tests of any loop where the @@ -226,7 +275,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, // update the branch to use the new comparison; in the common case this // will make old comparison dead. BI->setCondition(Cond); - RecursivelyDeleteTriviallyDeadInstructions(OrigCond); + DeadInsts.push_back(OrigCond); ++NumLFTR; Changed = true; @@ -304,14 +353,18 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { if (!SE->isLoopInvariant(ExitValue, L)) continue; - Changed = true; - ++NumReplaced; - Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' << " LoopVal = " << *Inst << "\n"); + if (!isValidRewrite(Inst, ExitVal)) { + DeadInsts.push_back(ExitVal); + continue; + } + Changed = true; + ++NumReplaced; + PN->setIncomingValue(i, ExitVal); // If this instruction is dead now, delete it. @@ -366,8 +419,6 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { } void IndVarSimplify::EliminateIVComparisons() { - SmallVector<WeakVH, 16> DeadInsts; - // Look for ICmp users. for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) { IVStrideUse &UI = *I; @@ -399,18 +450,9 @@ void IndVarSimplify::EliminateIVComparisons() { DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); DeadInsts.push_back(ICmp); } - - // Now that we're done iterating through lists, clean up any instructions - // which are now dead. - while (!DeadInsts.empty()) - if (Instruction *Inst = - dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) - RecursivelyDeleteTriviallyDeadInstructions(Inst); } void IndVarSimplify::EliminateIVRemainders() { - SmallVector<WeakVH, 16> DeadInsts; - // Look for SRem and URem users. for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) { IVStrideUse &UI = *I; @@ -466,13 +508,6 @@ void IndVarSimplify::EliminateIVRemainders() { DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); DeadInsts.push_back(Rem); } - - // Now that we're done iterating through lists, clean up any instructions - // which are now dead. - while (!DeadInsts.empty()) - if (Instruction *Inst = - dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) - RecursivelyDeleteTriviallyDeadInstructions(Inst); } bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { @@ -491,6 +526,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { LI = &getAnalysis<LoopInfo>(); SE = &getAnalysis<ScalarEvolution>(); DT = &getAnalysis<DominatorTree>(); + DeadInsts.clear(); Changed = false; // If there are any floating-point recurrences, attempt to @@ -589,9 +625,21 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { ExitingBlock, BI, Rewriter); } - // Rewrite IV-derived expressions. Clears the rewriter cache. + // Rewrite IV-derived expressions. RewriteIVExpressions(L, Rewriter); + // Clear the rewriter cache, because values that are in the rewriter's cache + // can be deleted in the loop below, causing the AssertingVH in the cache to + // trigger. + Rewriter.clear(); + + // Now that we're done iterating through lists, clean up any instructions + // which are now dead. + while (!DeadInsts.empty()) + if (Instruction *Inst = + dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) + RecursivelyDeleteTriviallyDeadInstructions(Inst); + // The Rewriter may not be used from this point on. // Loop-invariant instructions in the preheader that aren't used in the @@ -632,7 +680,7 @@ static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) { if (!isSafe(*I, L, SE)) return false; return true; } - + // A cast is safe if its operand is. if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) return isSafe(C->getOperand(), L, SE); @@ -651,8 +699,6 @@ static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) { } void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { - SmallVector<WeakVH, 16> DeadInsts; - // Rewrite all induction variable expressions in terms of the canonical // induction variable. // @@ -705,6 +751,13 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { // Now expand it into actual Instructions and patch it into place. Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt); + DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' + << " into = " << *NewVal << "\n"); + + if (!isValidRewrite(Op, NewVal)) { + DeadInsts.push_back(NewVal); + continue; + } // Inform ScalarEvolution that this value is changing. The change doesn't // affect its value, but it does potentially affect which use lists the // value will be on after the replacement, which affects ScalarEvolution's @@ -717,25 +770,13 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { NewVal->takeName(Op); User->replaceUsesOfWith(Op, NewVal); UI->setOperandValToReplace(NewVal); - DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' - << " into = " << *NewVal << "\n"); + ++NumRemoved; Changed = true; // The old value may be dead now. DeadInsts.push_back(Op); } - - // Clear the rewriter cache, because values that are in the rewriter's cache - // can be deleted in the loop below, causing the AssertingVH in the cache to - // trigger. - Rewriter.clear(); - // Now that we're done iterating through lists, clean up any instructions - // which are now dead. - while (!DeadInsts.empty()) - if (Instruction *Inst = - dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) - RecursivelyDeleteTriviallyDeadInstructions(Inst); } /// If there's a single exit block, sink any loop-invariant values that @@ -859,7 +900,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { BinaryOperator *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return; - + // If this is not an add of the PHI with a constantfp, or if the constant fp // is not an integer, bail out. ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); @@ -884,7 +925,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { if (Compare == 0 || !Compare->hasOneUse() || !isa<BranchInst>(Compare->use_back())) return; - + BranchInst *TheBr = cast<BranchInst>(Compare->use_back()); // We need to verify that the branch actually controls the iteration count @@ -896,8 +937,8 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { (L->contains(TheBr->getSuccessor(0)) && L->contains(TheBr->getSuccessor(1)))) return; - - + + // If it isn't a comparison with an integer-as-fp (the exit value), we can't // transform it. ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); @@ -905,7 +946,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { if (ExitValueVal == 0 || !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) return; - + // Find new predicate for integer comparison. CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; switch (Compare->getPredicate()) { @@ -923,13 +964,13 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { case CmpInst::FCMP_OLE: case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; } - + // We convert the floating point induction variable to a signed i32 value if // we can. This is only safe if the comparison will not overflow in a way // that won't be trapped by the integer equivalent operations. Check for this // now. // TODO: We could use i64 if it is native and the range requires it. - + // The start/stride/exit values must all fit in signed i32. if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) return; @@ -945,59 +986,59 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { if (InitValue >= ExitValue || NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE) return; - + uint32_t Range = uint32_t(ExitValue-InitValue); if (NewPred == CmpInst::ICMP_SLE) { // Normalize SLE -> SLT, check for infinite loop. if (++Range == 0) return; // Range overflows. } - + unsigned Leftover = Range % uint32_t(IncValue); - + // If this is an equality comparison, we require that the strided value // exactly land on the exit value, otherwise the IV condition will wrap // around and do things the fp IV wouldn't. if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && Leftover != 0) return; - + // If the stride would wrap around the i32 before exiting, we can't // transform the IV. if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) return; - + } else { // If we have a negative stride, we require the init to be greater than the // exit value and an equality or greater than comparison. if (InitValue >= ExitValue || NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE) return; - + uint32_t Range = uint32_t(InitValue-ExitValue); if (NewPred == CmpInst::ICMP_SGE) { // Normalize SGE -> SGT, check for infinite loop. if (++Range == 0) return; // Range overflows. } - + unsigned Leftover = Range % uint32_t(-IncValue); - + // If this is an equality comparison, we require that the strided value // exactly land on the exit value, otherwise the IV condition will wrap // around and do things the fp IV wouldn't. if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && Leftover != 0) return; - + // If the stride would wrap around the i32 before exiting, we can't // transform the IV. if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) return; } - + const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); // Insert new integer induction variable. - PHINode *NewPHI = PHINode::Create(Int32Ty, PN->getName()+".int", PN); + PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), PN->getIncomingBlock(IncomingEdge)); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 90094a8..7168177 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -16,6 +16,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Pass.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/Loads.h" @@ -170,9 +171,9 @@ bool JumpThreading::runOnFunction(Function &F) { Changed = true; continue; } - + BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); - + // Can't thread an unconditional jump, but if the block is "almost // empty", we can replace uses of it with uses of the successor and make // this dead. @@ -608,7 +609,7 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) { static bool hasAddressTakenAndUsed(BasicBlock *BB) { if (!BB->hasAddressTaken()) return false; - + // If the block has its address taken, it may be a tree of dead constants // hanging off of it. These shouldn't keep the block alive. BlockAddress *BA = BlockAddress::get(BB); @@ -668,6 +669,17 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { return false; // Must be an invoke. } + // Run constant folding to see if we can reduce the condition to a simple + // constant. + if (Instruction *I = dyn_cast<Instruction>(Condition)) { + Value *SimpleVal = ConstantFoldInstruction(I, TD); + if (SimpleVal) { + I->replaceAllUsesWith(SimpleVal); + I->eraseFromParent(); + Condition = SimpleVal; + } + } + // If the terminator is branching on an undef, we can pick any of the // successors to branch to. Let GetBestDestForJumpOnUndef decide. if (isa<UndefValue>(Condition)) { @@ -928,13 +940,14 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { array_pod_sort(AvailablePreds.begin(), AvailablePreds.end()); // Create a PHI node at the start of the block for the PRE'd load value. - PHINode *PN = PHINode::Create(LI->getType(), "", LoadBB->begin()); + pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB); + PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "", + LoadBB->begin()); PN->takeName(LI); // Insert new entries into the PHI for each predecessor. A single block may // have multiple entries here. - for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E; - ++PI) { + for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; AvailablePredsTy::iterator I = std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(), diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 0786793..93de9cf 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -445,7 +445,8 @@ void LICM::sink(Instruction &I) { // enough that we handle it as a special (more efficient) case. It is more // efficient to handle because there are no PHI nodes that need to be placed. if (ExitBlocks.size() == 1) { - if (!DT->dominates(I.getParent(), ExitBlocks[0])) { + if (!isa<DbgInfoIntrinsic>(I) && + !DT->dominates(I.getParent(), ExitBlocks[0])) { // Instruction is not used, just delete it. CurAST->deleteValue(&I); // If I has users in unreachable blocks, eliminate. @@ -742,30 +743,13 @@ void LICM::PromoteAliasSet(AliasSet &AS) { Preheader->getTerminator()); SSA.AddAvailableValue(Preheader, PreheaderLoad); - // Copy any value stored to or loaded from a must-alias of the pointer. - if (PreheaderLoad->getType()->isPointerTy()) { - Value *SomeValue; - if (LoadInst *LI = dyn_cast<LoadInst>(LoopUses[0])) - SomeValue = LI; - else - SomeValue = cast<StoreInst>(LoopUses[0])->getValueOperand(); - - CurAST->copyValue(SomeValue, PreheaderLoad); - } - // Rewrite all the loads in the loop and remember all the definitions from // stores in the loop. Promoter.run(LoopUses); - - // If the preheader load is itself a pointer, we need to tell alias analysis - // about the new pointer we created in the preheader block and about any PHI - // nodes that just got inserted. - if (PreheaderLoad->getType()->isPointerTy()) { - for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) - CurAST->copyValue(PreheaderLoad, NewPHIs[i]); - } - - // fwew, we're done! + + // If the SSAUpdater didn't use the load in the preheader, just zap it now. + if (PreheaderLoad->use_empty()) + PreheaderLoad->eraseFromParent(); } diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index f8ce214..1366231 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -81,7 +81,7 @@ namespace { bool processLoopStore(StoreInst *SI, const SCEV *BECount); bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); - + bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment, Value *SplatValue, Instruction *TheStore, @@ -91,7 +91,7 @@ namespace { const SCEVAddRecExpr *StoreEv, const SCEVAddRecExpr *LoadEv, const SCEV *BECount); - + /// This transformation requires natural loop information & requires that /// loop preheaders be inserted into the CFG. /// @@ -134,50 +134,50 @@ Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); } /// static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { SmallVector<Instruction*, 32> NowDeadInsts; - + NowDeadInsts.push_back(I); - + // Before we touch this instruction, remove it from SE! do { Instruction *DeadInst = NowDeadInsts.pop_back_val(); - + // This instruction is dead, zap it, in stages. Start by removing it from // SCEV. SE.forgetValue(DeadInst); - + for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { Value *Op = DeadInst->getOperand(op); DeadInst->setOperand(op, 0); - + // If this operand just became dead, add it to the NowDeadInsts list. if (!Op->use_empty()) continue; - + if (Instruction *OpI = dyn_cast<Instruction>(Op)) if (isInstructionTriviallyDead(OpI)) NowDeadInsts.push_back(OpI); } - + DeadInst->eraseFromParent(); - + } while (!NowDeadInsts.empty()); } bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { CurLoop = L; - + // The trip count of the loop must be analyzable. SE = &getAnalysis<ScalarEvolution>(); if (!SE->hasLoopInvariantBackedgeTakenCount(L)) return false; const SCEV *BECount = SE->getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BECount)) return false; - + // If this loop executes exactly one time, then it should be peeled, not // optimized by this pass. if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) if (BECst->getValue()->getValue() == 0) return false; - + // We require target data for now. TD = getAnalysisIfAvailable<TargetData>(); if (TD == 0) return false; @@ -185,14 +185,14 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { DT = &getAnalysis<DominatorTree>(); LoopInfo &LI = getAnalysis<LoopInfo>(); TLI = &getAnalysis<TargetLibraryInfo>(); - + SmallVector<BasicBlock*, 8> ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); DEBUG(dbgs() << "loop-idiom Scanning: F[" << L->getHeader()->getParent()->getName() << "] Loop %" << L->getHeader()->getName() << "\n"); - + bool MadeChange = false; // Scan all the blocks in the loop that are not in subloops. for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; @@ -200,7 +200,7 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { // Ignore blocks in subloops. if (LI.getLoopFor(*BI) != CurLoop) continue; - + MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks); } return MadeChange; @@ -217,7 +217,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) if (!DT->dominates(BB, ExitBlocks[i])) return false; - + bool MadeChange = false; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { Instruction *Inst = I++; @@ -226,20 +226,20 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, WeakVH InstPtr(I); if (!processLoopStore(SI, BECount)) continue; MadeChange = true; - + // If processing the store invalidated our iterator, start over from the // top of the block. if (InstPtr == 0) I = BB->begin(); continue; } - + // Look for memset instructions, which may be optimized to a larger memset. if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) { WeakVH InstPtr(I); if (!processLoopMemSet(MSI, BECount)) continue; MadeChange = true; - + // If processing the memset invalidated our iterator, start over from the // top of the block. if (InstPtr == 0) @@ -247,7 +247,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, continue; } } - + return MadeChange; } @@ -258,12 +258,12 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { Value *StoredVal = SI->getValueOperand(); Value *StorePtr = SI->getPointerOperand(); - + // Reject stores that are so large that they overflow an unsigned. uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType()); if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) return false; - + // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided store. If we have something else, it's a // random store we can't handle. @@ -274,9 +274,9 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { // Check to see if the stride matches the size of the store. If so, then we // know that every byte is touched in the loop. - unsigned StoreSize = (unsigned)SizeInBits >> 3; + unsigned StoreSize = (unsigned)SizeInBits >> 3; const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1)); - + if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) { // TODO: Could also handle negative stride here someday, that will require // the validity check in mayLoopAccessLocation to be updated though. @@ -285,7 +285,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { dbgs() << "NEGATIVE STRIDE: " << *SI << "\n"; dbgs() << "BB: " << *SI->getParent(); } - + return false; } @@ -319,9 +319,9 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { // If we're not allowed to hack on memset, we fail. if (!TLI->has(LibFunc::memset)) return false; - + Value *Pointer = MSI->getDest(); - + // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided store. If we have something else, it's a // random store we can't handle. @@ -333,16 +333,16 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue(); if ((SizeInBytes >> 32) != 0) return false; - + // Check to see if the stride matches the size of the memset. If so, then we // know that every byte is touched in the loop. const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1)); - + // TODO: Could also handle negative stride here someday, that will require the // validity check in mayLoopAccessLocation to be updated though. if (Stride == 0 || MSI->getLength() != Stride->getValue()) return false; - + return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, MSI->getAlignment(), MSI->getValue(), MSI, Ev, BECount); @@ -365,7 +365,7 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access, // to be exactly the size of the memset, which is (BECount+1)*StoreSize if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize; - + // TODO: For this to be really effective, we have to dive into the pointer // operand in the store. Store to &A[i] of 100 will always return may alias // with store of &A[100], we need to StoreLoc to be "A" with size of 100, @@ -394,12 +394,12 @@ static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) { // that doesn't seem worthwhile. Constant *C = dyn_cast<Constant>(V); if (C == 0) return 0; - + // Only handle simple values that are a power of two bytes in size. uint64_t Size = TD.getTypeSizeInBits(V->getType()); if (Size == 0 || (Size & 7) || (Size & (Size-1))) return 0; - + // Don't care enough about darwin/ppc to implement this. if (TD.isBigEndian()) return 0; @@ -410,7 +410,7 @@ static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) { // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see // if the top and bottom are the same (e.g. for vectors and large integers). if (Size > 16) return 0; - + // If the constant is exactly 16 bytes, just use it. if (Size == 16) return C; @@ -428,14 +428,14 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment, Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev, const SCEV *BECount) { - + // If the stored value is a byte-wise value (like i32 -1), then it may be // turned into a memset of i8 -1, assuming that all the consecutive bytes // are stored. A store of i32 0x01020304 can never be turned into a memset, // but it can be turned into memset_pattern if the target supports it. Value *SplatValue = isBytewiseValue(StoredVal); Constant *PatternValue = 0; - + // If we're allowed to form a memset, and the stored value would be acceptable // for memset, use it. if (SplatValue && TLI->has(LibFunc::memset) && @@ -453,8 +453,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // do anything with a 3-byte store, for example. return false; } - - + + // Okay, we have a strided store "p[i]" of a splattable value. We can turn // this into a memset in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read @@ -463,47 +463,47 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, CurLoop, BECount, StoreSize, getAnalysis<AliasAnalysis>(), TheStore)) return false; - + // Okay, everything looks good, insert the memset. BasicBlock *Preheader = CurLoop->getLoopPreheader(); - + IRBuilder<> Builder(Preheader->getTerminator()); - + // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the // header. Just insert code for it in the preheader. SCEVExpander Expander(*SE); - + unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace(); - Value *BasePtr = + Value *BasePtr = Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace), Preheader->getTerminator()); - + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext()); BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); - + const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), - true /*no unsigned overflow*/); + SCEV::FlagNUW); if (StoreSize != 1) NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), - true /*no unsigned overflow*/); - - Value *NumBytes = + SCEV::FlagNUW); + + Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); - - Value *NewCall; + + CallInst *NewCall; if (SplatValue) NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment); else { Module *M = TheStore->getParent()->getParent()->getParent(); Value *MSP = M->getOrInsertFunction("memset_pattern16", Builder.getVoidTy(), - Builder.getInt8PtrTy(), + Builder.getInt8PtrTy(), Builder.getInt8PtrTy(), IntPtr, (void*)0); - + // Otherwise we should form a memset_pattern16. PatternValue is known to be // an constant array of 16-bytes. Plop the value into a mergable global. GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true, @@ -514,11 +514,11 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy()); NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes); } - + DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n" << " from store to: " << *Ev << " at: " << *TheStore << "\n"); - (void)NewCall; - + NewCall->setDebugLoc(TheStore->getDebugLoc()); + // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. DeleteDeadInstruction(TheStore, *SE); @@ -536,9 +536,9 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // If we're not allowed to form memcpy, we fail. if (!TLI->has(LibFunc::memcpy)) return false; - + LoadInst *LI = cast<LoadInst>(SI->getValueOperand()); - + // Okay, we have a strided store "p[i]" of a loaded value. We can turn // this into a memcpy in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read @@ -555,49 +555,49 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, CurLoop, BECount, StoreSize, getAnalysis<AliasAnalysis>(), SI)) return false; - + // Okay, everything looks good, insert the memcpy. BasicBlock *Preheader = CurLoop->getLoopPreheader(); - + IRBuilder<> Builder(Preheader->getTerminator()); - + // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the // header. Just insert code for it in the preheader. SCEVExpander Expander(*SE); - Value *LoadBasePtr = + Value *LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(), Builder.getInt8PtrTy(LI->getPointerAddressSpace()), Preheader->getTerminator()); - Value *StoreBasePtr = + Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(), Builder.getInt8PtrTy(SI->getPointerAddressSpace()), Preheader->getTerminator()); - + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. const Type *IntPtr = TD->getIntPtrType(SI->getContext()); BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); - + const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), - true /*no unsigned overflow*/); + SCEV::FlagNUW); if (StoreSize != 1) NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), - true /*no unsigned overflow*/); - + SCEV::FlagNUW); + Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); - + Value *NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, std::min(SI->getAlignment(), LI->getAlignment())); - + DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); (void)NewCall; - + // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. DeleteDeadInstruction(SI, *SE); diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 95e1578..47dced3 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -184,7 +184,11 @@ bool LoopRotate::rotateLoop(Loop *L) { // Now, this loop is suitable for rotation. BasicBlock *OrigPreheader = L->getLoopPreheader(); BasicBlock *OrigLatch = L->getLoopLatch(); - assert(OrigPreheader && OrigLatch && "Loop not in canonical form?"); + + // If the loop could not be converted to canonical form, it must have an + // indirectbr in it, just give up. + if (OrigPreheader == 0 || OrigLatch == 0) + return false; // Anything ScalarEvolution may know about this loop or the PHI nodes // in its header will soon be invalidated. @@ -322,7 +326,8 @@ bool LoopRotate::rotateLoop(Loop *L) { // We can fold the conditional branch in the preheader, this makes things // simpler. The first step is to remove the extra edge to the Exit block. Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/); - BranchInst::Create(NewHeader, PHBI); + BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI); + NewBI->setDebugLoc(PHBI->getDebugLoc()); PHBI->eraseFromParent(); // With our CFG finalized, update DomTree if it is available. diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index ac4aea2..5abc790 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -253,7 +253,8 @@ static void DoInitialMatch(const SCEV *S, Loop *L, DoInitialMatch(AR->getStart(), L, Good, Bad, SE); DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), AR->getStepRecurrence(SE), - AR->getLoop()), + // FIXME: AR->getNoWrapFlags() + AR->getLoop(), SCEV::FlagAnyWrap), L, Good, Bad, SE); return; } @@ -455,7 +456,10 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE, IgnoreSignificantBits); if (!Start) return 0; - return SE.getAddRecExpr(Start, Step, AR->getLoop()); + // FlagNW is independent of the start value, step direction, and is + // preserved with smaller magnitude steps. + // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) + return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap); } return 0; } @@ -520,7 +524,9 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) { SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end()); int64_t Result = ExtractImmediate(NewOps.front(), SE); if (Result != 0) - S = SE.getAddRecExpr(NewOps, AR->getLoop()); + S = SE.getAddRecExpr(NewOps, AR->getLoop(), + // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) + SCEV::FlagAnyWrap); return Result; } return 0; @@ -545,7 +551,9 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end()); GlobalValue *Result = ExtractSymbol(NewOps.front(), SE); if (Result) - S = SE.getAddRecExpr(NewOps, AR->getLoop()); + S = SE.getAddRecExpr(NewOps, AR->getLoop(), + // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) + SCEV::FlagAnyWrap); return Result; } return 0; @@ -564,9 +572,6 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::prefetch: - case Intrinsic::x86_sse2_loadu_dq: - case Intrinsic::x86_sse2_loadu_pd: - case Intrinsic::x86_sse_loadu_ps: case Intrinsic::x86_sse_storeu_ps: case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: @@ -781,7 +786,7 @@ void Cost::RateFormula(const Formula &F, } } -/// Loose - Set this cost to a loosing value. +/// Loose - Set this cost to a losing value. void Cost::Loose() { NumRegs = ~0u; AddRecCost = ~0u; @@ -1483,7 +1488,7 @@ void LSRInstance::OptimizeShadowIV() { if (!C->getValue().isStrictlyPositive()) continue; /* Add new PHINode. */ - PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH); + PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH); /* create new increment. '++d' in above example. */ Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue()); @@ -1819,7 +1824,7 @@ LSRInstance::OptimizeLoopTermCond() { } } -/// reconcileNewOffset - Determine if the given use can accomodate a fixup +/// reconcileNewOffset - Determine if the given use can accommodate a fixup /// at the given offset and other details. If so, update the use and /// return true. bool @@ -2236,7 +2241,9 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, if (!AR->getStart()->isZero()) { CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), AR->getStepRecurrence(SE), - AR->getLoop()), + AR->getLoop(), + //FIXME: AR->getNoWrapFlags(SCEV::FlagNW) + SCEV::FlagAnyWrap), C, Ops, L, SE); CollectSubexprs(AR->getStart(), C, Ops, L, SE); return; @@ -3047,7 +3054,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { } } -/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call +/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call /// FilterOutUndesirableDedicatedRegisters again, if necessary, now that /// we've done more filtering, as it may be able to find more formulae to /// eliminate. diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 80b263a..fef6bc3 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -43,7 +43,13 @@ namespace { class LoopUnroll : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopUnroll() : LoopPass(ID) { + LoopUnroll(int T = -1, int C = -1, int P = -1) : LoopPass(ID) { + CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T); + CurrentCount = (C == -1) ? UnrollCount : unsigned(C); + CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P; + + UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0); + initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); } @@ -56,7 +62,10 @@ namespace { // explicit -unroll-threshold). static const unsigned OptSizeUnrollThreshold = 50; + unsigned CurrentCount; unsigned CurrentThreshold; + bool CurrentAllowPartial; + bool UserThreshold; // CurrentThreshold is user-specified. bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -87,7 +96,9 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false) -Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); } +Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) { + return new LoopUnroll(Threshold, Count, AllowPartial); +} /// ApproximateLoopSize - Approximate the size of the loop. static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) { @@ -119,14 +130,14 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // from UnrollThreshold, it is overridden to a smaller value if the current // function is marked as optimize-for-size, and the unroll threshold was // not user specified. - CurrentThreshold = UnrollThreshold; - if (Header->getParent()->hasFnAttr(Attribute::OptimizeForSize) && - UnrollThreshold.getNumOccurrences() == 0) - CurrentThreshold = OptSizeUnrollThreshold; + unsigned Threshold = CurrentThreshold; + if (!UserThreshold && + Header->getParent()->hasFnAttr(Attribute::OptimizeForSize)) + Threshold = OptSizeUnrollThreshold; // Find trip count unsigned TripCount = L->getSmallConstantTripCount(); - unsigned Count = UnrollCount; + unsigned Count = CurrentCount; // Automatically select an unroll count. if (Count == 0) { @@ -140,7 +151,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { } // Enforce the threshold. - if (CurrentThreshold != NoThreshold) { + if (Threshold != NoThreshold) { unsigned NumInlineCandidates; unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates); DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); @@ -149,16 +160,16 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { return false; } uint64_t Size = (uint64_t)LoopSize*Count; - if (TripCount != 1 && Size > CurrentThreshold) { + if (TripCount != 1 && Size > Threshold) { DEBUG(dbgs() << " Too large to fully unroll with count: " << Count - << " because size: " << Size << ">" << CurrentThreshold << "\n"); - if (!UnrollAllowPartial) { + << " because size: " << Size << ">" << Threshold << "\n"); + if (!CurrentAllowPartial) { DEBUG(dbgs() << " will not try to unroll partially because " << "-unroll-allow-partial not given\n"); return false; } // Reduce unroll count to be modulo of TripCount for partial unrolling - Count = CurrentThreshold / LoopSize; + Count = Threshold / LoopSize; while (Count != 0 && TripCount%Count != 0) { Count--; } diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index bde0e53..a3035cb 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/IRBuilder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include <list> using namespace llvm; @@ -299,12 +300,15 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, namespace { class MemCpyOpt : public FunctionPass { MemoryDependenceAnalysis *MD; + TargetLibraryInfo *TLI; const TargetData *TD; public: static char ID; // Pass identification, replacement for typeid MemCpyOpt() : FunctionPass(ID) { initializeMemCpyOptPass(*PassRegistry::getPassRegistry()); MD = 0; + TLI = 0; + TD = 0; } bool runOnFunction(Function &F); @@ -316,6 +320,7 @@ namespace { AU.addRequired<DominatorTree>(); AU.addRequired<MemoryDependenceAnalysis>(); AU.addRequired<AliasAnalysis>(); + AU.addRequired<TargetLibraryInfo>(); AU.addPreserved<AliasAnalysis>(); AU.addPreserved<MemoryDependenceAnalysis>(); } @@ -346,6 +351,7 @@ INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false) @@ -688,7 +694,7 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, if (M->getSource() == MDep->getSource()) return false; - // Second, the length of the memcpy's must be the same, or the preceeding one + // Second, the length of the memcpy's must be the same, or the preceding one // must be larger than the following one. ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength()); ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength()); @@ -804,6 +810,9 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { bool MemCpyOpt::processMemMove(MemMoveInst *M) { AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + if (!TLI->has(LibFunc::memmove)) + return false; + // See if the pointers alias. if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M))) return false; @@ -935,6 +944,14 @@ bool MemCpyOpt::runOnFunction(Function &F) { bool MadeChange = false; MD = &getAnalysis<MemoryDependenceAnalysis>(); TD = getAnalysisIfAvailable<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); + + // If we don't have at least memset and memcpy, there is little point of doing + // anything here. These are required by a freestanding implementation, so if + // even they are disabled, there is no point in trying hard. + if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy)) + return false; + while (1) { if (!iterateOnFunction(F)) break; diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index e093b52..c1dfe15 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -22,6 +22,7 @@ #define DEBUG_TYPE "reassociate" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -74,6 +75,8 @@ namespace { class Reassociate : public FunctionPass { DenseMap<BasicBlock*, unsigned> RankMap; DenseMap<AssertingVH<>, unsigned> ValueRankMap; + SmallVector<WeakVH, 8> RedoInsts; + SmallVector<WeakVH, 8> DeadInsts; bool MadeChange; public: static char ID; // Pass identification, replacement for typeid @@ -98,7 +101,7 @@ namespace { void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops); void LinearizeExpr(BinaryOperator *I); Value *RemoveFactorFromExpression(Value *V, Value *Factor); - void ReassociateBB(BasicBlock *BB); + void ReassociateInst(BasicBlock::iterator &BBI); void RemoveDeadBinaryOp(Value *V); }; @@ -113,13 +116,13 @@ FunctionPass *llvm::createReassociatePass() { return new Reassociate(); } void Reassociate::RemoveDeadBinaryOp(Value *V) { Instruction *Op = dyn_cast<Instruction>(V); - if (!Op || !isa<BinaryOperator>(Op) || !Op->use_empty()) + if (!Op || !isa<BinaryOperator>(Op)) return; Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1); ValueRankMap.erase(Op); - Op->eraseFromParent(); + DeadInsts.push_back(Op); RemoveDeadBinaryOp(LHS); RemoveDeadBinaryOp(RHS); } @@ -214,6 +217,7 @@ static Instruction *LowerNegateToMultiply(Instruction *Neg, ValueRankMap.erase(Neg); Res->takeName(Neg); Neg->replaceAllUsesWith(Res); + Res->setDebugLoc(Neg->getDebugLoc()); Neg->eraseFromParent(); return Res; } @@ -503,6 +507,7 @@ static Instruction *BreakUpSubtract(Instruction *Sub, // Everyone now refers to the add instruction. ValueRankMap.erase(Sub); Sub->replaceAllUsesWith(New); + New->setDebugLoc(Sub->getDebugLoc()); Sub->eraseFromParent(); DEBUG(dbgs() << "Negated: " << *New << '\n'); @@ -528,6 +533,7 @@ static Instruction *ConvertShiftToMul(Instruction *Shl, ValueRankMap.erase(Shl); Mul->takeName(Shl); Shl->replaceAllUsesWith(Mul); + Mul->setDebugLoc(Shl->getDebugLoc()); Shl->eraseFromParent(); return Mul; } @@ -603,7 +609,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { // remaining operand. if (Factors.size() == 1) { ValueRankMap.erase(BO); - BO->eraseFromParent(); + DeadInsts.push_back(BO); V = Factors[0].Op; } else { RewriteExprTree(BO, Factors); @@ -732,7 +738,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I, // Now that we have inserted a multiply, optimize it. This allows us to // handle cases that require multiple factoring steps, such as this: // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6 - Mul = ReassociateExpression(cast<BinaryOperator>(Mul)); + RedoInsts.push_back(Mul); // If every add operand was a duplicate, return the multiply. if (Ops.empty()) @@ -960,71 +966,69 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, } -/// ReassociateBB - Inspect all of the instructions in this basic block, -/// reassociating them as we go. -void Reassociate::ReassociateBB(BasicBlock *BB) { - for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) { - Instruction *BI = BBI++; - if (BI->getOpcode() == Instruction::Shl && - isa<ConstantInt>(BI->getOperand(1))) - if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) { - MadeChange = true; - BI = NI; - } +/// ReassociateInst - Inspect and reassociate the instruction at the +/// given position, post-incrementing the position. +void Reassociate::ReassociateInst(BasicBlock::iterator &BBI) { + Instruction *BI = BBI++; + if (BI->getOpcode() == Instruction::Shl && + isa<ConstantInt>(BI->getOperand(1))) + if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) { + MadeChange = true; + BI = NI; + } - // Reject cases where it is pointless to do this. - if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() || - BI->getType()->isVectorTy()) - continue; // Floating point ops are not associative. - - // Do not reassociate boolean (i1) expressions. We want to preserve the - // original order of evaluation for short-circuited comparisons that - // SimplifyCFG has folded to AND/OR expressions. If the expression - // is not further optimized, it is likely to be transformed back to a - // short-circuited form for code gen, and the source order may have been - // optimized for the most likely conditions. - if (BI->getType()->isIntegerTy(1)) - continue; + // Reject cases where it is pointless to do this. + if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() || + BI->getType()->isVectorTy()) + return; // Floating point ops are not associative. + + // Do not reassociate boolean (i1) expressions. We want to preserve the + // original order of evaluation for short-circuited comparisons that + // SimplifyCFG has folded to AND/OR expressions. If the expression + // is not further optimized, it is likely to be transformed back to a + // short-circuited form for code gen, and the source order may have been + // optimized for the most likely conditions. + if (BI->getType()->isIntegerTy(1)) + return; - // If this is a subtract instruction which is not already in negate form, - // see if we can convert it to X+-Y. - if (BI->getOpcode() == Instruction::Sub) { - if (ShouldBreakUpSubtract(BI)) { - BI = BreakUpSubtract(BI, ValueRankMap); - // Reset the BBI iterator in case BreakUpSubtract changed the - // instruction it points to. - BBI = BI; - ++BBI; + // If this is a subtract instruction which is not already in negate form, + // see if we can convert it to X+-Y. + if (BI->getOpcode() == Instruction::Sub) { + if (ShouldBreakUpSubtract(BI)) { + BI = BreakUpSubtract(BI, ValueRankMap); + // Reset the BBI iterator in case BreakUpSubtract changed the + // instruction it points to. + BBI = BI; + ++BBI; + MadeChange = true; + } else if (BinaryOperator::isNeg(BI)) { + // Otherwise, this is a negation. See if the operand is a multiply tree + // and if this is not an inner node of a multiply tree. + if (isReassociableOp(BI->getOperand(1), Instruction::Mul) && + (!BI->hasOneUse() || + !isReassociableOp(BI->use_back(), Instruction::Mul))) { + BI = LowerNegateToMultiply(BI, ValueRankMap); MadeChange = true; - } else if (BinaryOperator::isNeg(BI)) { - // Otherwise, this is a negation. See if the operand is a multiply tree - // and if this is not an inner node of a multiply tree. - if (isReassociableOp(BI->getOperand(1), Instruction::Mul) && - (!BI->hasOneUse() || - !isReassociableOp(BI->use_back(), Instruction::Mul))) { - BI = LowerNegateToMultiply(BI, ValueRankMap); - MadeChange = true; - } } } + } - // If this instruction is a commutative binary operator, process it. - if (!BI->isAssociative()) continue; - BinaryOperator *I = cast<BinaryOperator>(BI); + // If this instruction is a commutative binary operator, process it. + if (!BI->isAssociative()) return; + BinaryOperator *I = cast<BinaryOperator>(BI); - // If this is an interior node of a reassociable tree, ignore it until we - // get to the root of the tree, to avoid N^2 analysis. - if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode())) - continue; + // If this is an interior node of a reassociable tree, ignore it until we + // get to the root of the tree, to avoid N^2 analysis. + if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode())) + return; - // If this is an add tree that is used by a sub instruction, ignore it - // until we process the subtract. - if (I->hasOneUse() && I->getOpcode() == Instruction::Add && - cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub) - continue; + // If this is an add tree that is used by a sub instruction, ignore it + // until we process the subtract. + if (I->hasOneUse() && I->getOpcode() == Instruction::Add && + cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub) + return; - ReassociateExpression(I); - } + ReassociateExpression(I); } Value *Reassociate::ReassociateExpression(BinaryOperator *I) { @@ -1051,6 +1055,8 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) { // eliminate it. DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n'); I->replaceAllUsesWith(V); + if (Instruction *VI = dyn_cast<Instruction>(V)) + VI->setDebugLoc(I->getDebugLoc()); RemoveDeadBinaryOp(I); ++NumAnnihil; return V; @@ -1074,6 +1080,8 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) { // This expression tree simplified to something that isn't a tree, // eliminate it. I->replaceAllUsesWith(Ops[0].Op); + if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op)) + OI->setDebugLoc(I->getDebugLoc()); RemoveDeadBinaryOp(I); return Ops[0].Op; } @@ -1091,7 +1099,21 @@ bool Reassociate::runOnFunction(Function &F) { MadeChange = false; for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) - ReassociateBB(FI); + for (BasicBlock::iterator BBI = FI->begin(); BBI != FI->end(); ) + ReassociateInst(BBI); + + // Now that we're done, revisit any instructions which are likely to + // have secondary reassociation opportunities. + while (!RedoInsts.empty()) + if (Value *V = RedoInsts.pop_back_val()) { + BasicBlock::iterator BBI = cast<Instruction>(V); + ReassociateInst(BBI); + } + + // Now that we're done, delete any instructions which are no longer used. + while (!DeadInsts.empty()) + if (Value *V = DeadInsts.pop_back_val()) + RecursivelyDeleteTriviallyDeadInstructions(V); // We are done with the rank map. RankMap.clear(); diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp index 459bb06..47afc77 100644 --- a/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/lib/Transforms/Scalar/Reg2Mem.cpp @@ -9,7 +9,7 @@ // // This file demotes all registers to memory references. It is intented to be // the inverse of PromoteMemoryToRegister. By converting to loads, the only -// values live accross basic blocks are allocas and loads before phi nodes. +// values live across basic blocks are allocas and loads before phi nodes. // It is intended that this should make CFG hacking much easier. // To make later hacking easier, the entry block is split into two, such that // all introduced allocas and nothing else are in the entry block. diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index c82e929..db8eb85 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -1989,7 +1989,7 @@ bool IPSCCP::runOnModule(Module &M) { ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType())); } - // If we infered constant or undef values for globals variables, we can delete + // If we inferred constant or undef values for globals variables, we can delete // the global and any stores that remain to it. const DenseMap<GlobalVariable*, LatticeVal> &TG = Solver.getTrackedGlobals(); for (DenseMap<GlobalVariable*, LatticeVal>::const_iterator I = TG.begin(), diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index bf9ca6d..32a0506 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -17,6 +17,7 @@ #include "llvm-c/Initialization.h" #include "llvm/InitializePasses.h" #include "llvm/PassManager.h" +#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Verifier.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Scalar.h" @@ -34,7 +35,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeDCEPass(Registry); initializeDeadInstEliminationPass(Registry); initializeDSEPass(Registry); - initializeGEPSplitterPass(Registry); initializeGVNPass(Registry); initializeEarlyCSEPass(Registry); initializeIndVarSimplifyPass(Registry); @@ -56,7 +56,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeSROA_DTPass(Registry); initializeSROA_SSAUpPass(Registry); initializeCFGSimplifyPassPass(Registry); - initializeSimplifyHalfPowrLibCallsPass(Registry); initializeSimplifyLibCallsPass(Registry); initializeSinkingPass(Registry); initializeTailDupPass(Registry); @@ -103,6 +102,10 @@ void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createLoopDeletionPass()); } +void LLVMAddLoopIdiomPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLoopIdiomPass()); +} + void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) { unwrap(PM)->add(createLoopRotatePass()); } @@ -135,6 +138,10 @@ void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createScalarReplAggregatesPass()); } +void LLVMAddScalarReplAggregatesPassSSA(LLVMPassManagerRef PM) { + unwrap(PM)->add(createScalarReplAggregatesPass(-1, false)); +} + void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM, int Threshold) { unwrap(PM)->add(createScalarReplAggregatesPass(Threshold)); @@ -159,3 +166,19 @@ void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) { void LLVMAddVerifierPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createVerifierPass()); } + +void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createCorrelatedValuePropagationPass()); +} + +void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createEarlyCSEPass()); +} + +void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createTypeBasedAliasAnalysisPass()); +} + +void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createBasicAliasAnalysisPass()); +} diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index c3ca852..8178c27 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -219,7 +219,7 @@ namespace { /// optimization, which scans the uses of an alloca and determines if it can /// rewrite it in terms of a single new alloca that can be mem2reg'd. class ConvertToScalarInfo { - /// AllocaSize - The size of the alloca being considered. + /// AllocaSize - The size of the alloca being considered in bytes. unsigned AllocaSize; const TargetData &TD; @@ -238,19 +238,22 @@ class ConvertToScalarInfo { /// also declared as a vector, we do want to promote to a vector. bool HadAVector; + /// HadNonMemTransferAccess - True if there is at least one access to the + /// alloca that is not a MemTransferInst. We don't want to turn structs into + /// large integers unless there is some potential for optimization. + bool HadNonMemTransferAccess; + public: explicit ConvertToScalarInfo(unsigned Size, const TargetData &td) - : AllocaSize(Size), TD(td) { - IsNotTrivial = false; - VectorTy = 0; - HadAVector = false; - } + : AllocaSize(Size), TD(td), IsNotTrivial(false), VectorTy(0), + HadAVector(false), HadNonMemTransferAccess(false) { } AllocaInst *TryConvert(AllocaInst *AI); private: bool CanConvertToScalar(Value *V, uint64_t Offset); - void MergeInType(const Type *In, uint64_t Offset); + void MergeInType(const Type *In, uint64_t Offset, bool IsLoadOrStore); + bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset); void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType, @@ -282,9 +285,14 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { << *VectorTy << '\n'); NewTy = VectorTy; // Use the vector type. } else { + unsigned BitWidth = AllocaSize * 8; + if (!HadAVector && !HadNonMemTransferAccess && + !TD.fitsInLegalInteger(BitWidth)) + return 0; + DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); // Create and insert the integer alloca. - NewTy = IntegerType::get(AI->getContext(), AllocaSize*8); + NewTy = IntegerType::get(AI->getContext(), BitWidth); } AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); ConvertUsesToScalar(AI, NewAI, 0); @@ -294,16 +302,21 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { /// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy) /// so far at the offset specified by Offset (which is specified in bytes). /// -/// There are two cases we handle here: +/// There are three cases we handle here: /// 1) A union of vector types of the same size and potentially its elements. /// Here we turn element accesses into insert/extract element operations. /// This promotes a <4 x float> with a store of float to the third element /// into a <4 x float> that uses insert element. -/// 2) A fully general blob of memory, which we turn into some (potentially +/// 2) A union of vector types with power-of-2 size differences, e.g. a float, +/// <2 x float> and <4 x float>. Here we turn element accesses into insert +/// and extract element operations, and <2 x float> accesses into a cast to +/// <2 x double>, an extract, and a cast back to <2 x float>. +/// 3) A fully general blob of memory, which we turn into some (potentially /// large) integer type with extract and insert operations where the loads /// and stores would mutate the memory. We mark this by setting VectorTy /// to VoidTy. -void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { +void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset, + bool IsLoadOrStore) { // If we already decided to turn this into a blob of integer memory, there is // nothing to be done. if (VectorTy && VectorTy->isVoidTy()) @@ -314,33 +327,33 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { // If the In type is a vector that is the same size as the alloca, see if it // matches the existing VecTy. if (const VectorType *VInTy = dyn_cast<VectorType>(In)) { - // Remember if we saw a vector type. - HadAVector = true; - - if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) { - // If we're storing/loading a vector of the right size, allow it as a - // vector. If this the first vector we see, remember the type so that - // we know the element size. If this is a subsequent access, ignore it - // even if it is a differing type but the same size. Worst case we can - // bitcast the resultant vectors. - if (VectorTy == 0) - VectorTy = VInTy; + if (MergeInVectorType(VInTy, Offset)) return; - } } else if (In->isFloatTy() || In->isDoubleTy() || (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 && isPowerOf2_32(In->getPrimitiveSizeInBits()))) { + // Full width accesses can be ignored, because they can always be turned + // into bitcasts. + unsigned EltSize = In->getPrimitiveSizeInBits()/8; + if (IsLoadOrStore && EltSize == AllocaSize) + return; + // If we're accessing something that could be an element of a vector, see // if the implied vector agrees with what we already have and if Offset is // compatible with it. - unsigned EltSize = In->getPrimitiveSizeInBits()/8; - if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 && - (VectorTy == 0 || - cast<VectorType>(VectorTy)->getElementType() - ->getPrimitiveSizeInBits()/8 == EltSize)) { - if (VectorTy == 0) + if (Offset % EltSize == 0 && AllocaSize % EltSize == 0) { + if (!VectorTy) { VectorTy = VectorType::get(In, AllocaSize/EltSize); - return; + return; + } + + unsigned CurrentEltSize = cast<VectorType>(VectorTy)->getElementType() + ->getPrimitiveSizeInBits()/8; + if (EltSize == CurrentEltSize) + return; + + if (In->isIntegerTy() && isPowerOf2_32(AllocaSize / EltSize)) + return; } } @@ -349,6 +362,77 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { VectorTy = Type::getVoidTy(In->getContext()); } +/// MergeInVectorType - Handles the vector case of MergeInType, returning true +/// if the type was successfully merged and false otherwise. +bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy, + uint64_t Offset) { + // Remember if we saw a vector type. + HadAVector = true; + + // TODO: Support nonzero offsets? + if (Offset != 0) + return false; + + // Only allow vectors that are a power-of-2 away from the size of the alloca. + if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8))) + return false; + + // If this the first vector we see, remember the type so that we know the + // element size. + if (!VectorTy) { + VectorTy = VInTy; + return true; + } + + unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth(); + unsigned InBitWidth = VInTy->getBitWidth(); + + // Vectors of the same size can be converted using a simple bitcast. + if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8)) + return true; + + const Type *ElementTy = cast<VectorType>(VectorTy)->getElementType(); + const Type *InElementTy = cast<VectorType>(VInTy)->getElementType(); + + // Do not allow mixed integer and floating-point accesses from vectors of + // different sizes. + if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy()) + return false; + + if (ElementTy->isFloatingPointTy()) { + // Only allow floating-point vectors of different sizes if they have the + // same element type. + // TODO: This could be loosened a bit, but would anything benefit? + if (ElementTy != InElementTy) + return false; + + // There are no arbitrary-precision floating-point types, which limits the + // number of legal vector types with larger element types that we can form + // to bitcast and extract a subvector. + // TODO: We could support some more cases with mixed fp128 and double here. + if (!(BitWidth == 64 || BitWidth == 128) || + !(InBitWidth == 64 || InBitWidth == 128)) + return false; + } else { + assert(ElementTy->isIntegerTy() && "Vector elements must be either integer " + "or floating-point."); + unsigned BitWidth = ElementTy->getPrimitiveSizeInBits(); + unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits(); + + // Do not allow integer types smaller than a byte or types whose widths are + // not a multiple of a byte. + if (BitWidth < 8 || InBitWidth < 8 || + BitWidth % 8 != 0 || InBitWidth % 8 != 0) + return false; + } + + // Pick the largest of the two vector types. + if (InBitWidth > BitWidth) + VectorTy = VInTy; + + return true; +} + /// CanConvertToScalar - V is a pointer. If we can convert the pointee and all /// its accesses to a single vector type, return true and set VecTy to /// the new type. If we could convert the alloca into a single promotable @@ -369,7 +453,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // Don't touch MMX operations. if (LI->getType()->isX86_MMXTy()) return false; - MergeInType(LI->getType(), Offset); + HadNonMemTransferAccess = true; + MergeInType(LI->getType(), Offset, true); continue; } @@ -379,7 +464,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // Don't touch MMX operations. if (SI->getOperand(0)->getType()->isX86_MMXTy()) return false; - MergeInType(SI->getOperand(0)->getType(), Offset); + HadNonMemTransferAccess = true; + MergeInType(SI->getOperand(0)->getType(), Offset, true); continue; } @@ -403,6 +489,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { if (!CanConvertToScalar(GEP, Offset+GEPOffset)) return false; IsNotTrivial = true; // Can't be mem2reg'd. + HadNonMemTransferAccess = true; continue; } @@ -414,6 +501,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { !isa<ConstantInt>(MSI->getLength())) return false; IsNotTrivial = true; // Can't be mem2reg'd. + HadNonMemTransferAccess = true; continue; } @@ -575,6 +663,63 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, } } +/// getScaledElementType - Gets a scaled element type for a partial vector +/// access of an alloca. The input types must be integer or floating-point +/// scalar or vector types, and the resulting type is an integer, float or +/// double. +static const Type *getScaledElementType(const Type *Ty1, const Type *Ty2, + unsigned NewBitWidth) { + bool IsFP1 = Ty1->isFloatingPointTy() || + (Ty1->isVectorTy() && + cast<VectorType>(Ty1)->getElementType()->isFloatingPointTy()); + bool IsFP2 = Ty2->isFloatingPointTy() || + (Ty2->isVectorTy() && + cast<VectorType>(Ty2)->getElementType()->isFloatingPointTy()); + + LLVMContext &Context = Ty1->getContext(); + + // Prefer floating-point types over integer types, as integer types may have + // been created by earlier scalar replacement. + if (IsFP1 || IsFP2) { + if (NewBitWidth == 32) + return Type::getFloatTy(Context); + if (NewBitWidth == 64) + return Type::getDoubleTy(Context); + } + + return Type::getIntNTy(Context, NewBitWidth); +} + +/// CreateShuffleVectorCast - Creates a shuffle vector to convert one vector +/// to another vector of the same element type which has the same allocation +/// size but different primitive sizes (e.g. <3 x i32> and <4 x i32>). +static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType, + IRBuilder<> &Builder) { + const Type *FromType = FromVal->getType(); + const VectorType *FromVTy = cast<VectorType>(FromType); + const VectorType *ToVTy = cast<VectorType>(ToType); + assert((ToVTy->getElementType() == FromVTy->getElementType()) && + "Vectors must have the same element type"); + Value *UnV = UndefValue::get(FromType); + unsigned numEltsFrom = FromVTy->getNumElements(); + unsigned numEltsTo = ToVTy->getNumElements(); + + SmallVector<Constant*, 3> Args; + const Type* Int32Ty = Builder.getInt32Ty(); + unsigned minNumElts = std::min(numEltsFrom, numEltsTo); + unsigned i; + for (i=0; i != minNumElts; ++i) + Args.push_back(ConstantInt::get(Int32Ty, i)); + + if (i < numEltsTo) { + Constant* UnC = UndefValue::get(Int32Ty); + for (; i != numEltsTo; ++i) + Args.push_back(UnC); + } + Constant *Mask = ConstantVector::get(Args); + return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV"); +} + /// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer /// or vector value FromVal, extracting the bits from the offset specified by /// Offset. This returns the value, which is of type ToType. @@ -589,14 +734,46 @@ Value *ConvertToScalarInfo:: ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, uint64_t Offset, IRBuilder<> &Builder) { // If the load is of the whole new alloca, no conversion is needed. - if (FromVal->getType() == ToType && Offset == 0) + const Type *FromType = FromVal->getType(); + if (FromType == ToType && Offset == 0) return FromVal; // If the result alloca is a vector type, this is either an element // access or a bitcast to another vector type of the same size. - if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) { - if (ToType->isVectorTy()) - return Builder.CreateBitCast(FromVal, ToType, "tmp"); + if (const VectorType *VTy = dyn_cast<VectorType>(FromType)) { + unsigned ToTypeSize = TD.getTypeAllocSize(ToType); + if (ToTypeSize == AllocaSize) { + // If the two types have the same primitive size, use a bit cast. + // Otherwise, it is two vectors with the same element type that has + // the same allocation size but different number of elements so use + // a shuffle vector. + if (FromType->getPrimitiveSizeInBits() == + ToType->getPrimitiveSizeInBits()) + return Builder.CreateBitCast(FromVal, ToType, "tmp"); + else + return CreateShuffleVectorCast(FromVal, ToType, Builder); + } + + if (isPowerOf2_64(AllocaSize / ToTypeSize)) { + assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value " + "of a smaller vector type at a nonzero offset."); + + const Type *CastElementTy = getScaledElementType(FromType, ToType, + ToTypeSize * 8); + unsigned NumCastVectorElements = AllocaSize / ToTypeSize; + + LLVMContext &Context = FromVal->getContext(); + const Type *CastTy = VectorType::get(CastElementTy, + NumCastVectorElements); + Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp"); + + unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy); + unsigned Elt = Offset/EltSize; + assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); + Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get( + Type::getInt32Ty(Context), Elt), "tmp"); + return Builder.CreateBitCast(Extract, ToType, "tmp"); + } // Otherwise it must be an element access. unsigned Elt = 0; @@ -714,21 +891,49 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, // Changing the whole vector with memset or with an access of a different // vector type? - if (ValSize == VecSize) - return Builder.CreateBitCast(SV, AllocaType, "tmp"); + if (ValSize == VecSize) { + // If the two types have the same primitive size, use a bit cast. + // Otherwise, it is two vectors with the same element type that has + // the same allocation size but different number of elements so use + // a shuffle vector. + if (VTy->getPrimitiveSizeInBits() == + SV->getType()->getPrimitiveSizeInBits()) + return Builder.CreateBitCast(SV, AllocaType, "tmp"); + else + return CreateShuffleVectorCast(SV, VTy, Builder); + } - uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); + if (isPowerOf2_64(VecSize / ValSize)) { + assert(!(SV->getType()->isVectorTy() && Offset != 0) && "Can't insert a " + "value of a smaller vector type at a nonzero offset."); - // Must be an element insertion. - unsigned Elt = Offset/EltSize; + const Type *CastElementTy = getScaledElementType(VTy, SV->getType(), + ValSize); + unsigned NumCastVectorElements = VecSize / ValSize; - if (SV->getType() != VTy->getElementType()) - SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp"); + LLVMContext &Context = SV->getContext(); + const Type *OldCastTy = VectorType::get(CastElementTy, + NumCastVectorElements); + Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp"); - SV = Builder.CreateInsertElement(Old, SV, + Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp"); + + unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy); + unsigned Elt = Offset/EltSize; + assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); + Value *Insert = + Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get( + Type::getInt32Ty(Context), Elt), "tmp"); + return Builder.CreateBitCast(Insert, AllocaType, "tmp"); + } + + // Must be an element insertion. + assert(SV->getType() == VTy->getElementType()); + uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); + unsigned Elt = Offset/EltSize; + return Builder.CreateInsertElement(Old, SV, ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt), "tmp"); - return SV; } // If SV is a first-class aggregate value, insert each value recursively. @@ -1083,7 +1288,8 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { } const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType(); - PHINode *NewPN = PHINode::Create(LoadTy, PN->getName()+".ld", PN); + PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(), + PN->getName()+".ld", PN); // Get the TBAA tag and alignment to use from one of the loads. It doesn't // matter which one we get and if any differ, it doesn't matter. diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index ce5dd73..1137c2b 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -73,7 +73,8 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { if (UseLLVMTrap) { Function *TrapFn = Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap); - CallInst::Create(TrapFn, "", I); + CallInst *CallTrap = CallInst::Create(TrapFn, "", I); + CallTrap->setDebugLoc(I->getDebugLoc()); } new UnreachableInst(I->getContext(), I); @@ -259,11 +260,12 @@ static bool MergeEmptyReturnBlocks(Function &F) { PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin()); if (RetBlockPHI == 0) { Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0); - RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), "merge", + pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock); + RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), + std::distance(PB, PE), "merge", &RetBlock->front()); - for (pred_iterator PI = pred_begin(RetBlock), E = pred_end(RetBlock); - PI != E; ++PI) + for (pred_iterator PI = PB; PI != PE; ++PI) RetBlockPHI->addIncoming(InVal, *PI); RetBlock->getTerminator()->setOperand(0, RetBlockPHI); } diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp deleted file mode 100644 index 70ff32e..0000000 --- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp +++ /dev/null @@ -1,160 +0,0 @@ -//===- SimplifyHalfPowrLibCalls.cpp - Optimize specific half_powr calls ---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a simple pass that applies an experimental -// transformation on calls to specific functions. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "simplify-libcalls-halfpowr" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Target/TargetData.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Debug.h" -using namespace llvm; - -namespace { - /// This pass optimizes well half_powr function calls. - /// - class SimplifyHalfPowrLibCalls : public FunctionPass { - const TargetData *TD; - public: - static char ID; // Pass identification - SimplifyHalfPowrLibCalls() : FunctionPass(ID) { - initializeSimplifyHalfPowrLibCallsPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - } - - Instruction * - InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs, - Instruction *InsertPt); - }; - char SimplifyHalfPowrLibCalls::ID = 0; -} // end anonymous namespace. - -INITIALIZE_PASS(SimplifyHalfPowrLibCalls, "simplify-libcalls-halfpowr", - "Simplify half_powr library calls", false, false) - -// Public interface to the Simplify HalfPowr LibCalls pass. -FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() { - return new SimplifyHalfPowrLibCalls(); -} - -/// InlineHalfPowrs - Inline a sequence of adjacent half_powr calls, rearranging -/// their control flow to better facilitate subsequent optimization. -Instruction * -SimplifyHalfPowrLibCalls:: -InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs, - Instruction *InsertPt) { - std::vector<BasicBlock *> Bodies; - BasicBlock *NewBlock = 0; - - for (unsigned i = 0, e = HalfPowrs.size(); i != e; ++i) { - CallInst *Call = cast<CallInst>(HalfPowrs[i]); - Function *Callee = Call->getCalledFunction(); - - // Minimally sanity-check the CFG of half_powr to ensure that it contains - // the kind of code we expect. If we're running this pass, we have - // reason to believe it will be what we expect. - Function::iterator I = Callee->begin(); - BasicBlock *Prologue = I++; - if (I == Callee->end()) break; - BasicBlock *SubnormalHandling = I++; - if (I == Callee->end()) break; - BasicBlock *Body = I++; - if (I != Callee->end()) break; - if (SubnormalHandling->getSinglePredecessor() != Prologue) - break; - BranchInst *PBI = dyn_cast<BranchInst>(Prologue->getTerminator()); - if (!PBI || !PBI->isConditional()) - break; - BranchInst *SNBI = dyn_cast<BranchInst>(SubnormalHandling->getTerminator()); - if (!SNBI || SNBI->isConditional()) - break; - if (!isa<ReturnInst>(Body->getTerminator())) - break; - - Instruction *NextInst = llvm::next(BasicBlock::iterator(Call)); - - // Inline the call, taking care of what code ends up where. - NewBlock = SplitBlock(NextInst->getParent(), NextInst, this); - - InlineFunctionInfo IFI(0, TD); - bool B = InlineFunction(Call, IFI); - assert(B && "half_powr didn't inline?"); - (void)B; - - BasicBlock *NewBody = NewBlock->getSinglePredecessor(); - assert(NewBody); - Bodies.push_back(NewBody); - } - - if (!NewBlock) - return InsertPt; - - // Put the code for all the bodies into one block, to facilitate - // subsequent optimization. - (void)SplitEdge(NewBlock->getSinglePredecessor(), NewBlock, this); - for (unsigned i = 0, e = Bodies.size(); i != e; ++i) { - BasicBlock *Body = Bodies[i]; - Instruction *FNP = Body->getFirstNonPHI(); - // Splice the insts from body into NewBlock. - NewBlock->getInstList().splice(NewBlock->begin(), Body->getInstList(), - FNP, Body->getTerminator()); - } - - return NewBlock->begin(); -} - -/// runOnFunction - Top level algorithm. -/// -bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) { - TD = getAnalysisIfAvailable<TargetData>(); - - bool Changed = false; - std::vector<Instruction *> HalfPowrs; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - // Look for calls. - bool IsHalfPowr = false; - if (CallInst *CI = dyn_cast<CallInst>(I)) { - // Look for direct calls and calls to non-external functions. - Function *Callee = CI->getCalledFunction(); - if (Callee && Callee->hasExternalLinkage()) { - // Look for calls with well-known names. - if (Callee->getName() == "__half_powrf4") - IsHalfPowr = true; - } - } - if (IsHalfPowr) - HalfPowrs.push_back(I); - // We're looking for sequences of up to three such calls, which we'll - // simplify as a group. - if ((!IsHalfPowr && !HalfPowrs.empty()) || HalfPowrs.size() == 3) { - I = InlineHalfPowrs(HalfPowrs, I); - E = I->getParent()->end(); - HalfPowrs.clear(); - Changed = true; - } - } - assert(HalfPowrs.empty() && "Block had no terminator!"); - } - - return Changed; -} diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 9f136d4..6247b03 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -49,6 +49,7 @@ class LibCallOptimization { protected: Function *Caller; const TargetData *TD; + const TargetLibraryInfo *TLI; LLVMContext* Context; public: LibCallOptimization() { } @@ -62,9 +63,11 @@ public: virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) =0; - Value *OptimizeCall(CallInst *CI, const TargetData *TD, IRBuilder<> &B) { + Value *OptimizeCall(CallInst *CI, const TargetData *TD, + const TargetLibraryInfo *TLI, IRBuilder<> &B) { Caller = CI->getParent()->getParent(); this->TD = TD; + this->TLI = TLI; if (CI->getCalledFunction()) Context = &CI->getCalledFunction()->getContext(); @@ -97,6 +100,15 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { } return true; } + +static bool CallHasFloatingPointArgument(const CallInst *CI) { + for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end(); + it != e; ++it) { + if ((*it)->getType()->isFloatingPointTy()) + return true; + } + return false; +} /// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality /// comparisons with With. @@ -1075,14 +1087,8 @@ struct ToAsciiOpt : public LibCallOptimization { // 'printf' Optimizations struct PrintFOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Require one fixed pointer argument and an integer/void result. - const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || - !(FT->getReturnType()->isIntegerTy() || - FT->getReturnType()->isVoidTy())) - return 0; - + Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { // Check for a fixed format string. std::string FormatStr; if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr)) @@ -1138,20 +1144,40 @@ struct PrintFOpt : public LibCallOptimization { } return 0; } + + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require one fixed pointer argument and an integer/void result. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || + !(FT->getReturnType()->isIntegerTy() || + FT->getReturnType()->isVoidTy())) + return 0; + + if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // printf(format, ...) -> iprintf(format, ...) if no floating point + // arguments. + if (TLI->has(LibFunc::iprintf) && !CallHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *IPrintFFn = + M->getOrInsertFunction("iprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(IPrintFFn); + B.Insert(New); + return New; + } + return 0; + } }; //===---------------------------------------===// // 'sprintf' Optimizations struct SPrintFOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Require two fixed pointer arguments and an integer result. - const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !FT->getReturnType()->isIntegerTy()) - return 0; - + Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { // Check for a fixed format string. std::string FormatStr; if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr)) @@ -1212,6 +1238,32 @@ struct SPrintFOpt : public LibCallOptimization { } return 0; } + + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require two fixed pointer arguments and an integer result. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating + // point arguments. + if (TLI->has(LibFunc::siprintf) && !CallHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *SIPrintFFn = + M->getOrInsertFunction("siprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(SIPrintFFn); + B.Insert(New); + return New; + } + return 0; + } }; //===---------------------------------------===// @@ -1278,14 +1330,8 @@ struct FPutsOpt : public LibCallOptimization { // 'fprintf' Optimizations struct FPrintFOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Require two fixed paramters as pointers and integer result. - const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !FT->getReturnType()->isIntegerTy()) - return 0; - + Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { // All the optimizations depend on the format string. std::string FormatStr; if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr)) @@ -1330,6 +1376,32 @@ struct FPrintFOpt : public LibCallOptimization { } return 0; } + + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require two fixed paramters as pointers and integer result. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no + // floating point arguments. + if (TLI->has(LibFunc::fiprintf) && !CallHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *FIPrintFFn = + M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(FIPrintFFn); + B.Insert(New); + return New; + } + return 0; + } }; //===---------------------------------------===// @@ -1544,8 +1616,11 @@ bool SimplifyLibCalls::runOnFunction(Function &F) { // Set the builder to the instruction after the call. Builder.SetInsertPoint(BB, I); + // Use debug location of CI for all new instructions. + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + // Try to optimize this call. - Value *Result = LCO->OptimizeCall(CI, TD, Builder); + Value *Result = LCO->OptimizeCall(CI, TD, TLI, Builder); if (Result == 0) continue; DEBUG(dbgs() << "SimplifyLibCalls simplified: " << *CI; diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 5b6bc04..539cc6f 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -36,7 +36,7 @@ // evaluated each time through the tail recursion. Safely keeping allocas // in the entry block requires analysis to proves that the tail-called // function does not read or write the stack object. -// 2. Tail recursion is only performed if the call immediately preceeds the +// 2. Tail recursion is only performed if the call immediately precedes the // return instruction. It's possible that there could be a jump between // the call and the return. // 3. There can be intervening operations between the call and the return that @@ -433,7 +433,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, if (CanMoveAboveCall(BBI, CI)) continue; // If we can't move the instruction above the call, it might be because it - // is an associative and commutative operation that could be tranformed + // is an associative and commutative operation that could be transformed // using accumulator recursion elimination. Check to see if this is the // case, and if so, remember the initial accumulator value for later. if ((AccumulatorRecursionEliminationInitVal = @@ -496,7 +496,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, Instruction *InsertPos = OldEntry->begin(); for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) { - PHINode *PN = PHINode::Create(I->getType(), + PHINode *PN = PHINode::Create(I->getType(), 2, I->getName() + ".tr", InsertPos); I->replaceAllUsesWith(PN); // Everyone use the PHI node now! PN->addIncoming(I, NewEntry); @@ -527,8 +527,10 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, if (AccumulatorRecursionEliminationInitVal) { Instruction *AccRecInstr = AccumulatorRecursionInstr; // Start by inserting a new PHI node for the accumulator. + pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry); PHINode *AccPN = PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(), + std::distance(PB, PE) + 1, "accumulator.tr", OldEntry->begin()); // Loop over all of the predecessors of the tail recursion block. For the @@ -537,8 +539,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, // other tail recursions eliminated) the accumulator is not modified. // Because we haven't added the branch in the current block to OldEntry yet, // it will not show up as a predecessor. - for (pred_iterator PI = pred_begin(OldEntry), PE = pred_end(OldEntry); - PI != PE; ++PI) { + for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; if (P == &F->getEntryBlock()) AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P); @@ -572,7 +573,9 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, // Now that all of the PHI nodes are in place, remove the call and // ret instructions, replacing them with an unconditional branch. - BranchInst::Create(OldEntry, Ret); + BranchInst *NewBI = BranchInst::Create(OldEntry, Ret); + NewBI->setDebugLoc(CI->getDebugLoc()); + BB->getInstList().erase(Ret); // Remove return. BB->getInstList().erase(CI); // Remove call. ++NumEliminated; diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index acaea19..c705cc5 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -447,7 +447,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // If the values coming into the block are not the same, we need a PHI. // Create the new PHI node, insert it into NewBB at the end of the block PHINode *NewPHI = - PHINode::Create(PN->getType(), PN->getName()+".ph", BI); + PHINode::Create(PN->getType(), NumPreds, PN->getName()+".ph", BI); if (AA) AA->copyValue(PN, NewPHI); // Move all of the PHI values for 'Preds' to the new PHI. @@ -538,3 +538,15 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, UncondBranch->eraseFromParent(); return cast<ReturnInst>(NewRet); } + +/// GetFirstDebugLocInBasicBlock - Return first valid DebugLoc entry in a +/// given basic block. +DebugLoc llvm::GetFirstDebugLocInBasicBlock(const BasicBlock *BB) { + for (BasicBlock::const_iterator BI = BB->begin(), BE = BB->end(); + BI != BE; ++BI) { + DebugLoc DL = BI->getDebugLoc(); + if (!DL.isUnknown()) + return DL; + } + return DebugLoc(); +} diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index 616b066..caf2aeb 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -56,7 +56,7 @@ char BreakCriticalEdges::ID = 0; INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges", "Break critical edges in CFG", false, false) -// Publically exposed interface to pass... +// Publicly exposed interface to pass... char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID; FunctionPass *llvm::createBreakCriticalEdgesPass() { return new BreakCriticalEdges(); @@ -140,7 +140,7 @@ static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds, if (VP->getParent() == SplitBB) continue; // Otherwise a new PHI is needed. Create one and populate it. - PHINode *NewPN = PHINode::Create(PN->getType(), "split", + PHINode *NewPN = PHINode::Create(PN->getType(), Preds.size(), "split", SplitBB->getTerminator()); for (unsigned i = 0, e = Preds.size(); i != e; ++i) NewPN->addIncoming(V, Preds[i]); diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index e633772..8c133ea 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -104,7 +104,7 @@ namespace { /// region, we need to split the entry block of the region so that the PHI node /// is easier to deal with. void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { - bool HasPredsFromRegion = false; + unsigned NumPredsFromRegion = 0; unsigned NumPredsOutsideRegion = 0; if (Header != &Header->getParent()->getEntryBlock()) { @@ -116,7 +116,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // header block into two. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (BlocksToExtract.count(PN->getIncomingBlock(i))) - HasPredsFromRegion = true; + ++NumPredsFromRegion; else ++NumPredsOutsideRegion; @@ -147,7 +147,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // Okay, now we need to adjust the PHI nodes and any branches from within the // region to go to the new header block instead of the old header block. - if (HasPredsFromRegion) { + if (NumPredsFromRegion) { PHINode *PN = cast<PHINode>(OldPred->begin()); // Loop over all of the predecessors of OldPred that are in the region, // changing them to branch to NewBB instead. @@ -157,14 +157,14 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { TI->replaceUsesOfWith(OldPred, NewBB); } - // Okay, everthing within the region is now branching to the right block, we + // Okay, everything within the region is now branching to the right block, we // just have to update the PHI nodes now, inserting PHI nodes into NewBB. for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) { PHINode *PN = cast<PHINode>(AfterPHIs); // Create a new PHI node in the new region, which has an incoming value // from OldPred of PN. - PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".ce", - NewBB->begin()); + PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, + PN->getName()+".ce", NewBB->begin()); NewPN->addIncoming(PN, OldPred); // Loop over all of the incoming value in PN, moving them to NewPN if they diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index c1faf24..7d17909 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -320,7 +320,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, // // Note that this only does one level of inlining. For example, if the // instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now -// exists in the instruction stream. Similiarly this will inline a recursive +// exists in the instruction stream. Similarly this will inline a recursive // function by one level. // bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { @@ -624,7 +624,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // The PHI node should go at the front of the new basic block to merge all // possible incoming values. if (!TheCall->use_empty()) { - PHI = PHINode::Create(RTy, TheCall->getName(), + PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(), AfterCallBB->begin()); // Anything that used the result of the function call should now use the // PHI node as their operand. diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index b2e5fa6..b654111 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -207,6 +207,8 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, DomTreeNode *DomNode = DT->getNode(DomBB); + SmallVector<PHINode*, 16> AddedPHIs; + SSAUpdater SSAUpdate; SSAUpdate.Initialize(Inst->getType(), Inst->getName()); @@ -220,9 +222,10 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, // If we already inserted something for this BB, don't reprocess it. if (SSAUpdate.HasValueForBlock(ExitBB)) continue; - PHINode *PN = PHINode::Create(Inst->getType(), Inst->getName()+".lcssa", + PHINode *PN = PHINode::Create(Inst->getType(), + PredCache.GetNumPreds(ExitBB), + Inst->getName()+".lcssa", ExitBB->begin()); - PN->reserveOperandSpace(PredCache.GetNumPreds(ExitBB)); // Add inputs from inside the loop for this PHI. for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) { @@ -236,6 +239,8 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, &PN->getOperandUse( PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1))); } + + AddedPHIs.push_back(PN); // Remember that this phi makes the value alive in this block. SSAUpdate.AddAvailableValue(ExitBB, PN); @@ -262,6 +267,12 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, // Otherwise, do full PHI insertion. SSAUpdate.RewriteUse(*UsesToRewrite[i]); } + + // Remove PHI nodes that did not have any uses rewritten. + for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) { + if (AddedPHIs[i]->use_empty()) + AddedPHIs[i]->eraseFromParent(); + } return true; } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 3f789fa..4bca2fc 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -20,8 +20,11 @@ #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Operator.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -65,8 +68,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) { // Let the basic block know that we are letting go of it. Based on this, // it will adjust it's PHI nodes. - assert(BI->getParent() && "Terminator not inserted in block!"); - OldDest->removePredecessor(BI->getParent()); + OldDest->removePredecessor(BB); // Replace the conditional branch with an unconditional one. BranchInst::Create(Destination, BI); @@ -209,8 +211,18 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) { bool llvm::isInstructionTriviallyDead(Instruction *I) { if (!I->use_empty() || isa<TerminatorInst>(I)) return false; - // We don't want debug info removed by anything this general. - if (isa<DbgInfoIntrinsic>(I)) return false; + // We don't want debug info removed by anything this general, unless + // debug info is empty. + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) { + if (DDI->getAddress()) + return false; + return true; + } + if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) { + if (DVI->getValue()) + return false; + return true; + } if (!I->mayHaveSideEffects()) return true; @@ -320,8 +332,14 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) { BI = BB->begin(); continue; } - + + if (Inst->isTerminator()) + break; + + WeakVH BIHandle(BI); MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst); + if (BIHandle != BI) + BI = BB->begin(); } return MadeChange; } @@ -632,6 +650,8 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I)); Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7)); } + // Avoid colliding with the DenseMap sentinels ~0 and ~0-1. + Hash >>= 1; // If we've never seen this hash value before, it's a unique PHI. std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair = HashMap.insert(std::make_pair(Hash, PN)); @@ -753,3 +773,83 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, return Align; } +///===---------------------------------------------------------------------===// +/// Dbg Intrinsic utilities +/// + +/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value +/// that has an associated llvm.dbg.decl intrinsic. +bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, + StoreInst *SI, DIBuilder &Builder) { + DIVariable DIVar(DDI->getVariable()); + if (!DIVar.Verify()) + return false; + + Instruction *DbgVal = + Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, + DIVar, SI); + + // Propagate any debug metadata from the store onto the dbg.value. + DebugLoc SIDL = SI->getDebugLoc(); + if (!SIDL.isUnknown()) + DbgVal->setDebugLoc(SIDL); + // Otherwise propagate debug metadata from dbg.declare. + else + DbgVal->setDebugLoc(DDI->getDebugLoc()); + return true; +} + +/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value +/// that has an associated llvm.dbg.decl intrinsic. +bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, + LoadInst *LI, DIBuilder &Builder) { + DIVariable DIVar(DDI->getVariable()); + if (!DIVar.Verify()) + return false; + + Instruction *DbgVal = + Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, + DIVar, LI); + + // Propagate any debug metadata from the store onto the dbg.value. + DebugLoc LIDL = LI->getDebugLoc(); + if (!LIDL.isUnknown()) + DbgVal->setDebugLoc(LIDL); + // Otherwise propagate debug metadata from dbg.declare. + else + DbgVal->setDebugLoc(DDI->getDebugLoc()); + return true; +} + +/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set +/// of llvm.dbg.value intrinsics. +bool llvm::LowerDbgDeclare(Function &F) { + DIBuilder DIB(*F.getParent()); + SmallVector<DbgDeclareInst *, 4> Dbgs; + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) + Dbgs.push_back(DDI); + } + if (Dbgs.empty()) + return false; + + for (SmallVector<DbgDeclareInst *, 4>::iterator I = Dbgs.begin(), + E = Dbgs.end(); I != E; ++I) { + DbgDeclareInst *DDI = *I; + if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) { + bool RemoveDDI = true; + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); + UI != E; ++UI) + if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) + ConvertDebugDeclareToDebugValue(DDI, SI, DIB); + else if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) + ConvertDebugDeclareToDebugValue(DDI, LI, DIB); + else + RemoveDDI = false; + if (RemoveDDI) + DDI->eraseFromParent(); + } + } + return true; +} diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 2462630..f02ffd2 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -115,7 +115,7 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", "Canonicalize natural loops", true, false) -// Publically exposed interface to pass... +// Publicly exposed interface to pass... char &llvm::LoopSimplifyID = LoopSimplify::ID; Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } @@ -648,9 +648,8 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { // the backedge block which correspond to any PHI nodes in the header block. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); - PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".be", - BETerminator); - NewPN->reserveOperandSpace(BackedgeBlocks.size()); + PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(), + PN->getName()+".be", BETerminator); if (AA) AA->copyValue(PN, NewPN); // Loop over the PHI node, moving all entries except the one for the diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 914a439..ed733d3 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -84,7 +84,7 @@ char LowerSwitch::ID = 0; INITIALIZE_PASS(LowerSwitch, "lowerswitch", "Lower SwitchInst's to branches", false, false) -// Publically exposed interface to pass... +// Publicly exposed interface to pass... char &llvm::LowerSwitchID = LowerSwitch::ID; // createLowerSwitchPass - Interface to this file... FunctionPass *llvm::createLowerSwitchPass() { diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 7788857..50c9ae2 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -38,6 +38,7 @@ #include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -45,7 +46,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CFG.h" #include <algorithm> -#include <map> #include <queue> using namespace llvm; @@ -103,7 +103,7 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { /// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the /// alloca 'V', if any. static DbgDeclareInst *FindAllocaDbgDeclare(Value *V) { - if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), &V, 1)) + if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), V)) for (Value::use_iterator UI = DebugNode->use_begin(), E = DebugNode->use_end(); UI != E; ++UI) if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI)) @@ -273,8 +273,6 @@ namespace { LargeBlockInfo &LBI); void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI); - void ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI); - void RenamePass(BasicBlock *BB, BasicBlock *Pred, RenamePassData::ValVector &IncVals, @@ -391,7 +389,9 @@ void PromoteMem2Reg::run() { if (Info.UsingBlocks.empty()) { // Record debuginfo for the store and remove the declaration's debuginfo. if (DbgDeclareInst *DDI = Info.DbgDeclare) { - ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore); + if (!DIB) + DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB); DDI->eraseFromParent(); } // Remove the (now dead) store and alloca. @@ -423,8 +423,11 @@ void PromoteMem2Reg::run() { while (!AI->use_empty()) { StoreInst *SI = cast<StoreInst>(AI->use_back()); // Record debuginfo for the store before removing it. - if (DbgDeclareInst *DDI = Info.DbgDeclare) - ConvertDebugDeclareToDebugValue(DDI, SI); + if (DbgDeclareInst *DDI = Info.DbgDeclare) { + if (!DIB) + DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); + } SI->eraseFromParent(); LBI.deleteValue(SI); } @@ -944,28 +947,6 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, } } -// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value -// that has an associated llvm.dbg.decl intrinsic. -void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, - StoreInst *SI) { - DIVariable DIVar(DDI->getVariable()); - if (!DIVar.Verify()) - return; - - if (!DIB) - DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); - Instruction *DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0, - DIVar, SI); - - // Propagate any debug metadata from the store onto the dbg.value. - DebugLoc SIDL = SI->getDebugLoc(); - if (!SIDL.isUnknown()) - DbgVal->setDebugLoc(SIDL); - // Otherwise propagate debug metadata from dbg.declare. - else - DbgVal->setDebugLoc(DDI->getDebugLoc()); -} - // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific // Alloca returns true if there wasn't already a phi-node for that variable // @@ -979,12 +960,11 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, // Create a PhiNode using the dereferenced type... and add the phi-node to the // BasicBlock. - PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), + PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB), Allocas[AllocaNo]->getName() + "." + Twine(Version++), BB->begin()); ++NumPHIInsert; PhiToAllocaMap[PN] = AllocaNo; - PN->reserveOperandSpace(getNumPreds(BB)); if (AST && PN->getType()->isPointerTy()) AST->copyValue(PointerAllocaValues[AllocaNo], PN); @@ -1076,8 +1056,11 @@ NextIteration: // what value were we writing? IncomingVals[ai->second] = SI->getOperand(0); // Record debuginfo for the store before removing it. - if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) - ConvertDebugDeclareToDebugValue(DDI, SI); + if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) { + if (!DIB) + DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); + } BB->getInstList().erase(SI); } } diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 3896d98..2860c3e 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "ssaupdater" +#include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -20,8 +21,10 @@ #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/SSAUpdaterImpl.h" + using namespace llvm; typedef DenseMap<BasicBlock*, Value*> AvailableValsTy; @@ -170,8 +173,8 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { } // Ok, we have no way out, insert a new one now. - PHINode *InsertedPHI = PHINode::Create(ProtoType, ProtoName, &BB->front()); - InsertedPHI->reserveOperandSpace(PredValues.size()); + PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(), + ProtoName, &BB->front()); // Fill in all the predecessors of the PHI. for (unsigned i = 0, e = PredValues.size(); i != e; ++i) @@ -184,6 +187,9 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { return V; } + // Set DebugLoc. + InsertedPHI->setDebugLoc(GetFirstDebugLocInBasicBlock(BB)); + // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); @@ -289,9 +295,8 @@ public: /// Reserve space for the operands but do not fill them in yet. static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds, SSAUpdater *Updater) { - PHINode *PHI = PHINode::Create(Updater->ProtoType, Updater->ProtoName, - &BB->front()); - PHI->reserveOperandSpace(NumPreds); + PHINode *PHI = PHINode::Create(Updater->ProtoType, NumPreds, + Updater->ProtoName, &BB->front()); return PHI; } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index c670885..18b8573 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -37,6 +37,10 @@ #include <map> using namespace llvm; +static cl::opt<unsigned> +PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1), + cl::desc("Control the amount of phi node folding to perform (default = 1)")); + static cl::opt<bool> DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false), cl::desc("Duplicate return instructions into unconditional branches")); @@ -201,11 +205,20 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, /// which works well enough for us. /// /// If AggressiveInsts is non-null, and if V does not dominate BB, we check to -/// see if V (which must be an instruction) is cheap to compute and is -/// non-trapping. If both are true, the instruction is inserted into the set -/// and true is returned. +/// see if V (which must be an instruction) and its recursive operands +/// that do not dominate BB have a combined cost lower than CostRemaining and +/// are non-trapping. If both are true, the instruction is inserted into the +/// set and true is returned. +/// +/// The cost for most non-trapping instructions is defined as 1 except for +/// Select whose cost is 2. +/// +/// After this function returns, CostRemaining is decreased by the cost of +/// V plus its non-dominating operands. If that cost is greater than +/// CostRemaining, false is returned and CostRemaining is undefined. static bool DominatesMergePoint(Value *V, BasicBlock *BB, - SmallPtrSet<Instruction*, 4> *AggressiveInsts) { + SmallPtrSet<Instruction*, 4> *AggressiveInsts, + unsigned &CostRemaining) { Instruction *I = dyn_cast<Instruction>(V); if (!I) { // Non-instructions all dominate instructions, but not all constantexprs @@ -232,12 +245,17 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // instructions in the 'if region'. if (AggressiveInsts == 0) return false; + // If we have seen this instruction before, don't count it again. + if (AggressiveInsts->count(I)) return true; + // Okay, it looks like the instruction IS in the "condition". Check to // see if it's a cheap instruction to unconditionally compute, and if it // only uses stuff defined outside of the condition. If so, hoist it out. if (!I->isSafeToSpeculativelyExecute()) return false; + unsigned Cost = 0; + switch (I->getOpcode()) { default: return false; // Cannot hoist this out safely. case Instruction::Load: @@ -246,11 +264,13 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // predecessor. if (PBB->getFirstNonPHIOrDbg() != I) return false; + Cost = 1; break; case Instruction::GetElementPtr: // GEPs are cheap if all indices are constant. if (!cast<GetElementPtrInst>(I)->hasAllConstantIndices()) return false; + Cost = 1; break; case Instruction::Add: case Instruction::Sub: @@ -261,13 +281,26 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, case Instruction::LShr: case Instruction::AShr: case Instruction::ICmp: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + Cost = 1; break; // These are all cheap and non-trapping instructions. + + case Instruction::Select: + Cost = 2; + break; } - // Okay, we can only really hoist these out if their operands are not - // defined in the conditional region. + if (Cost > CostRemaining) + return false; + + CostRemaining -= Cost; + + // Okay, we can only really hoist these out if their operands do + // not take us over the cost threshold. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) - if (!DominatesMergePoint(*i, BB, 0)) + if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining)) return false; // Okay, it's safe to do this! Remember this instruction. AggressiveInsts->insert(I); @@ -807,12 +840,16 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { BasicBlock::iterator BB2_Itr = BB2->begin(); Instruction *I1 = BB1_Itr++, *I2 = BB2_Itr++; - while (isa<DbgInfoIntrinsic>(I1)) - I1 = BB1_Itr++; - while (isa<DbgInfoIntrinsic>(I2)) - I2 = BB2_Itr++; - if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) || - !I1->isIdenticalToWhenDefined(I2) || + // Skip debug info if it is not identical. + DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1); + DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2); + if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { + while (isa<DbgInfoIntrinsic>(I1)) + I1 = BB1_Itr++; + while (isa<DbgInfoIntrinsic>(I2)) + I2 = BB2_Itr++; + } + if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) || (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))) return false; @@ -835,13 +872,17 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { I2->eraseFromParent(); I1 = BB1_Itr++; - while (isa<DbgInfoIntrinsic>(I1)) - I1 = BB1_Itr++; I2 = BB2_Itr++; - while (isa<DbgInfoIntrinsic>(I2)) - I2 = BB2_Itr++; - } while (I1->getOpcode() == I2->getOpcode() && - I1->isIdenticalToWhenDefined(I2)); + // Skip debug info if it is not identical. + DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1); + DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2); + if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { + while (isa<DbgInfoIntrinsic>(I1)) + I1 = BB1_Itr++; + while (isa<DbgInfoIntrinsic>(I2)) + I2 = BB2_Itr++; + } + } while (I1->isIdenticalToWhenDefined(I2)); return true; @@ -1209,6 +1250,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { // instructions. While we are at it, keep track of the instructions // that need to be moved to the dominating block. SmallPtrSet<Instruction*, 4> AggressiveInsts; + unsigned MaxCostVal0 = PHINodeFoldingThreshold, + MaxCostVal1 = PHINodeFoldingThreshold; for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) { PHINode *PN = cast<PHINode>(II++); @@ -1218,8 +1261,10 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { continue; } - if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts) || - !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts)) + if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts, + MaxCostVal0) || + !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts, + MaxCostVal1)) return false; } @@ -1393,24 +1438,23 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) { return true; } -/// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch, -/// and if a predecessor branches to us and one of our successors, fold the -/// setcc into the predecessor and use logical operations to pick the right -/// destination. +/// FoldBranchToCommonDest - If this basic block is simple enough, and if a +/// predecessor branches to us and one of our successors, fold the block into +/// the predecessor and use logical operations to pick the right destination. bool llvm::FoldBranchToCommonDest(BranchInst *BI) { BasicBlock *BB = BI->getParent(); Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || Cond->getParent() != BB || !Cond->hasOneUse()) return false; - + // Only allow this if the condition is a simple instruction that can be // executed unconditionally. It must be in the same block as the branch, and // must be at the front of the block. BasicBlock::iterator FrontIt = BB->front(); + // Ignore dbg intrinsics. - while (isa<DbgInfoIntrinsic>(FrontIt)) - ++FrontIt; + while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt; // Allow a single instruction to be hoisted in addition to the compare // that feeds the branch. We later ensure that any values that _it_ uses @@ -1422,21 +1466,23 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { FrontIt->isSafeToSpeculativelyExecute()) { BonusInst = &*FrontIt; ++FrontIt; + + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt; } - + // Only a single bonus inst is allowed. if (&*FrontIt != Cond) return false; // Make sure the instruction after the condition is the cond branch. BasicBlock::iterator CondIt = Cond; ++CondIt; + // Ingore dbg intrinsics. - while(isa<DbgInfoIntrinsic>(CondIt)) - ++CondIt; - if (&*CondIt != BI) { - assert (!isa<DbgInfoIntrinsic>(CondIt) && "Hey do not forget debug info!"); + while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt; + + if (&*CondIt != BI) return false; - } // Cond is known to be a compare or binary operator. Check to make sure that // neither operand is a potentially-trapping constant expression. @@ -1447,13 +1493,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { if (CE->canTrap()) return false; - // Finally, don't infinitely unroll conditional loops. BasicBlock *TrueDest = BI->getSuccessor(0); BasicBlock *FalseDest = BI->getSuccessor(1); if (TrueDest == BB || FalseDest == BB) return false; - + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *PredBlock = *PI; BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator()); @@ -1461,10 +1506,24 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Check that we have two conditional branches. If there is a PHI node in // the common successor, verify that the same value flows in from both // blocks. - if (PBI == 0 || PBI->isUnconditional() || - !SafeToMergeTerminators(BI, PBI)) + if (PBI == 0 || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI)) continue; + // Determine if the two branches share a common destination. + Instruction::BinaryOps Opc; + bool InvertPredCond = false; + + if (PBI->getSuccessor(0) == TrueDest) + Opc = Instruction::Or; + else if (PBI->getSuccessor(1) == FalseDest) + Opc = Instruction::And; + else if (PBI->getSuccessor(0) == FalseDest) + Opc = Instruction::And, InvertPredCond = true; + else if (PBI->getSuccessor(1) == TrueDest) + Opc = Instruction::Or, InvertPredCond = true; + else + continue; + // Ensure that any values used in the bonus instruction are also used // by the terminator of the predecessor. This means that those values // must already have been resolved, so we won't be inhibiting the @@ -1502,20 +1561,6 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { if (!UsedValues.empty()) return false; } - - Instruction::BinaryOps Opc; - bool InvertPredCond = false; - - if (PBI->getSuccessor(0) == TrueDest) - Opc = Instruction::Or; - else if (PBI->getSuccessor(1) == FalseDest) - Opc = Instruction::And; - else if (PBI->getSuccessor(0) == FalseDest) - Opc = Instruction::And, InvertPredCond = true; - else if (PBI->getSuccessor(1) == TrueDest) - Opc = Instruction::Or, InvertPredCond = true; - else - continue; DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); @@ -1566,6 +1611,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { AddPredecessorToBlock(FalseDest, PredBlock, BB); PBI->setSuccessor(1, FalseDest); } + + // Copy any debug value intrinsics into the end of PredBlock. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (isa<DbgInfoIntrinsic>(*I)) + I->clone()->insertBefore(PBI); + return true; } return false; @@ -1598,13 +1649,15 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // in the constant and simplify the block result. Subsequent passes of // simplifycfg will thread the block. if (BlockIsSimpleEnoughToThreadThrough(BB)) { + pred_iterator PB = pred_begin(BB), PE = pred_end(BB); PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()), + std::distance(PB, PE), BI->getCondition()->getName() + ".pr", BB->begin()); // Okay, we're going to insert the PHI node. Since PBI is not the only // predecessor, compute the PHI'd conditional value for all of the preds. // Any predecessor where the condition is not computable we keep symbolic. - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) && PBI != BI && PBI->isConditional() && @@ -1800,6 +1853,26 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, return true; } +// SimplifySwitchOnSelect - Replaces +// (switch (select cond, X, Y)) on constant X, Y +// with a branch - conditional if X and Y lead to distinct BBs, +// unconditional otherwise. +static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) { + // Check for constant integer values in the select. + ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue()); + ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue()); + if (!TrueVal || !FalseVal) + return false; + + // Find the relevant condition and destinations. + Value *Condition = Select->getCondition(); + BasicBlock *TrueBB = SI->getSuccessor(SI->findCaseValue(TrueVal)); + BasicBlock *FalseBB = SI->getSuccessor(SI->findCaseValue(FalseVal)); + + // Perform the actual simplification. + return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB); +} + // SimplifyIndirectBrOnSelect - Replaces // (indirectbr (select cond, blockaddress(@fn, BlockA), // blockaddress(@fn, BlockB))) @@ -2148,7 +2221,9 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { if (LI->isVolatile()) break; - // Delete this instruction + // Delete this instruction (any uses are guaranteed to be dead) + if (!BBI->use_empty()) + BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); BBI->eraseFromParent(); Changed = true; } @@ -2189,17 +2264,28 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { // If the default value is unreachable, figure out the most popular // destination and make it the default. if (SI->getSuccessor(0) == BB) { - std::map<BasicBlock*, unsigned> Popularity; - for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) - Popularity[SI->getSuccessor(i)]++; - + std::map<BasicBlock*, std::pair<unsigned, unsigned> > Popularity; + for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) { + std::pair<unsigned, unsigned>& entry = + Popularity[SI->getSuccessor(i)]; + if (entry.first == 0) { + entry.first = 1; + entry.second = i; + } else { + entry.first++; + } + } + // Find the most popular block. unsigned MaxPop = 0; + unsigned MaxIndex = 0; BasicBlock *MaxBlock = 0; - for (std::map<BasicBlock*, unsigned>::iterator + for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator I = Popularity.begin(), E = Popularity.end(); I != E; ++I) { - if (I->second > MaxPop) { - MaxPop = I->second; + if (I->second.first > MaxPop || + (I->second.first == MaxPop && MaxIndex > I->second.second)) { + MaxPop = I->second.first; + MaxIndex = I->second.second; MaxBlock = I->first; } } @@ -2309,7 +2395,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI) { if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred)) return SimplifyCFG(BB) | true; - + + Value *Cond = SI->getCondition(); + if (SelectInst *Select = dyn_cast<SelectInst>(Cond)) + if (SimplifySwitchOnSelect(SI, Select)) + return SimplifyCFG(BB) | true; + // If the block only contains the switch, see if we can fold the block // away into any preds. BasicBlock::iterator BBI = BB->begin(); diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index ccb8287..46d4ada 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -116,7 +116,8 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { ReturnInst::Create(F.getContext(), NULL, NewRetBlock); } else { // If the function doesn't return void... add a PHI node to the block... - PN = PHINode::Create(F.getReturnType(), "UnifiedRetVal"); + PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(), + "UnifiedRetVal"); NewRetBlock->getInstList().push_back(PN); ReturnInst::Create(F.getContext(), PN, NewRetBlock); } diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index f5481d3..a73bf04 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -39,7 +39,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, return VM[V] = const_cast<Value*>(V); // Create a dummy node in case we have a metadata cycle. - MDNode *Dummy = MDNode::getTemporary(V->getContext(), 0, 0); + MDNode *Dummy = MDNode::getTemporary(V->getContext(), ArrayRef<Value*>()); VM[V] = Dummy; // Check all operands to see if any need to be remapped. @@ -54,7 +54,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, Value *Op = MD->getOperand(i); Elts.push_back(Op ? MapValue(Op, VM, Flags) : 0); } - MDNode *NewMD = MDNode::get(V->getContext(), Elts.data(), Elts.size()); + MDNode *NewMD = MDNode::get(V->getContext(), Elts); Dummy->replaceAllUsesWith(NewMD); VM[V] = NewMD; MDNode::deleteTemporary(Dummy); diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index cbc874a..844284d 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -32,6 +32,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" @@ -39,9 +40,13 @@ #include "llvm/Support/FormattedStream.h" #include <algorithm> #include <cctype> -#include <map> using namespace llvm; +static cl::opt<bool> +EnableDebugInfoComment("enable-debug-info-comment", cl::Hidden, + cl::desc("Enable debug info comments")); + + // Make virtual table appear in this compilation unit. AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {} @@ -89,7 +94,7 @@ enum PrefixType { /// prefixed with % (if the string only contains simple characters) or is /// surrounded with ""'s (if it has special chars in it). Print it out. static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) { - assert(Name.data() && "Cannot get empty name!"); + assert(!Name.empty() && "Cannot get empty name!"); switch (Prefix) { default: llvm_unreachable("Bad prefix!"); case NoPrefix: break; @@ -1075,7 +1080,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, } if (CE->hasIndices()) { - const SmallVector<unsigned, 4> &Indices = CE->getIndices(); + ArrayRef<unsigned> Indices = CE->getIndices(); for (unsigned i = 0, e = Indices.size(); i != e; ++i) Out << ", " << Indices[i]; } @@ -1338,9 +1343,12 @@ void AssemblyWriter::printModule(const Module *M) { CurPos = NewLine+1; NewLine = Asm.find_first_of('\n', CurPos); } - Out << "module asm \""; - PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.end()), Out); - Out << "\"\n"; + std::string rest(Asm.begin()+CurPos, Asm.end()); + if (!rest.empty()) { + Out << "module asm \""; + PrintEscapedString(rest, Out); + Out << "\"\n"; + } } // Loop over the dependent libraries and emit them. @@ -1581,8 +1589,8 @@ void AssemblyWriter::printFunction(const Function *F) { case CallingConv::ARM_AAPCS: Out << "arm_aapcscc "; break; case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc "; break; case CallingConv::MSP430_INTR: Out << "msp430_intrcc "; break; - case CallingConv::PTX_Kernel: Out << "ptx_kernel"; break; - case CallingConv::PTX_Device: Out << "ptx_device"; break; + case CallingConv::PTX_Kernel: Out << "ptx_kernel "; break; + case CallingConv::PTX_Device: Out << "ptx_device "; break; default: Out << "cc" << F->getCallingConv() << " "; break; } @@ -1727,6 +1735,18 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) { if (AnnotationWriter) AnnotationWriter->emitBasicBlockEndAnnot(BB, Out); } +/// printDebugLoc - Print DebugLoc. +static void printDebugLoc(const DebugLoc &DL, formatted_raw_ostream &OS) { + OS << DL.getLine() << ":" << DL.getCol(); + if (MDNode *N = DL.getInlinedAt(getGlobalContext())) { + DebugLoc IDL = DebugLoc::getFromDILocation(N); + if (!IDL.isUnknown()) { + OS << "@"; + printDebugLoc(IDL,OS); + } + } +} + /// printInfoComment - Print a little comment after the instruction indicating /// which slot it occupies. /// @@ -1734,6 +1754,43 @@ void AssemblyWriter::printInfoComment(const Value &V) { if (AnnotationWriter) { AnnotationWriter->printInfoComment(V, Out); return; + } else if (EnableDebugInfoComment) { + bool Padded = false; + if (const Instruction *I = dyn_cast<Instruction>(&V)) { + const DebugLoc &DL = I->getDebugLoc(); + if (!DL.isUnknown()) { + if (!Padded) { + Out.PadToColumn(50); + Padded = true; + Out << ";"; + } + Out << " [debug line = "; + printDebugLoc(DL,Out); + Out << "]"; + } + if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) { + const MDNode *Var = DDI->getVariable(); + if (!Padded) { + Out.PadToColumn(50); + Padded = true; + Out << ";"; + } + if (Var && Var->getNumOperands() >= 2) + if (MDString *MDS = dyn_cast_or_null<MDString>(Var->getOperand(2))) + Out << " [debug variable = " << MDS->getString() << "]"; + } + else if (const DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) { + const MDNode *Var = DVI->getVariable(); + if (!Padded) { + Out.PadToColumn(50); + Padded = true; + Out << ";"; + } + if (Var && Var->getNumOperands() >= 2) + if (MDString *MDS = dyn_cast_or_null<MDString>(Var->getOperand(2))) + Out << " [debug variable = " << MDS->getString() << "]"; + } + } } } diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index b323540..4541f38 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -84,7 +84,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name.compare(14, 5, "vsubl", 5) == 0 || Name.compare(14, 5, "vaddw", 5) == 0 || Name.compare(14, 5, "vsubw", 5) == 0 || - Name.compare(14, 5, "vmull", 5) == 0 || Name.compare(14, 5, "vmlal", 5) == 0 || Name.compare(14, 5, "vmlsl", 5) == 0 || Name.compare(14, 5, "vabdl", 5) == 0 || @@ -528,6 +527,12 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { // or 0. NewFn = 0; return true; + } else if (Name.compare(5, 16, "x86.sse.loadu.ps", 16) == 0 || + Name.compare(5, 17, "x86.sse2.loadu.dq", 17) == 0 || + Name.compare(5, 17, "x86.sse2.loadu.pd", 17) == 0) { + // Calls to these instructions are transformed into unaligned loads. + NewFn = 0; + return true; } else if (Name.compare(5, 17, "x86.ssse3.pshuf.w", 17) == 0) { // This is an SSE/MMX instruction. const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext()); @@ -947,7 +952,29 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { // Remove upgraded instruction. CI->eraseFromParent(); - + + } else if (F->getName() == "llvm.x86.sse.loadu.ps" || + F->getName() == "llvm.x86.sse2.loadu.dq" || + F->getName() == "llvm.x86.sse2.loadu.pd") { + // Convert to a native, unaligned load. + const Type *VecTy = CI->getType(); + const Type *IntTy = IntegerType::get(C, 128); + IRBuilder<> Builder(C); + Builder.SetInsertPoint(CI->getParent(), CI); + + Value *BC = Builder.CreateBitCast(CI->getArgOperand(0), + PointerType::getUnqual(IntTy), + "cast"); + LoadInst *LI = Builder.CreateLoad(BC, CI->getName()); + LI->setAlignment(1); // Unaligned load. + BC = Builder.CreateBitCast(LI, VecTy, "new.cast"); + + // Fix up all the uses with our new load. + if (!CI->use_empty()) + CI->replaceAllUsesWith(BC); + + // Remove intrinsic. + CI->eraseFromParent(); } else { llvm_unreachable("Unknown function for CallInst upgrade."); } @@ -1180,74 +1207,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { break; } -#if 0 - case Intrinsic::x86_mmx_cvtsi32_si64: { - // The return type needs to be changed. - Value *Operands[1]; - Operands[0] = CI->getArgOperand(0); - ConstructNewCallInst(NewFn, CI, Operands, 1); - break; - } - case Intrinsic::x86_mmx_cvtsi64_si32: { - Value *Operands[1]; - - // Cast the operand to the X86 MMX type. - Operands[0] = new BitCastInst(CI->getArgOperand(0), - NewFn->getFunctionType()->getParamType(0), - "upgraded.", CI); - - ConstructNewCallInst(NewFn, CI, Operands, 1); - break; - } - case Intrinsic::x86_mmx_vec_init_b: - case Intrinsic::x86_mmx_vec_init_w: - case Intrinsic::x86_mmx_vec_init_d: { - // The return type needs to be changed. - Value *Operands[8]; - unsigned NumOps = 0; - - switch (NewFn->getIntrinsicID()) { - default: break; - case Intrinsic::x86_mmx_vec_init_b: NumOps = 8; break; - case Intrinsic::x86_mmx_vec_init_w: NumOps = 4; break; - case Intrinsic::x86_mmx_vec_init_d: NumOps = 2; break; - } - - switch (NewFn->getIntrinsicID()) { - default: break; - case Intrinsic::x86_mmx_vec_init_b: - Operands[7] = CI->getArgOperand(7); - Operands[6] = CI->getArgOperand(6); - Operands[5] = CI->getArgOperand(5); - Operands[4] = CI->getArgOperand(4); - // FALLTHRU - case Intrinsic::x86_mmx_vec_init_w: - Operands[3] = CI->getArgOperand(3); - Operands[2] = CI->getArgOperand(2); - // FALLTHRU - case Intrinsic::x86_mmx_vec_init_d: - Operands[1] = CI->getArgOperand(1); - Operands[0] = CI->getArgOperand(0); - break; - } - - ConstructNewCallInst(NewFn, CI, Operands, NumOps); - break; - } - case Intrinsic::x86_mmx_vec_ext_d: { - Value *Operands[2]; - - // Cast the operand to the X86 MMX type. - Operands[0] = new BitCastInst(CI->getArgOperand(0), - NewFn->getFunctionType()->getParamType(0), - "upgraded.", CI); - Operands[1] = CI->getArgOperand(1); - - ConstructNewCallInst(NewFn, CI, Operands, 2); - break; - } -#endif - case Intrinsic::ctlz: case Intrinsic::ctpop: case Intrinsic::cttz: { diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt index 1abd031..6bde263 100644 --- a/lib/VMCore/CMakeLists.txt +++ b/lib/VMCore/CMakeLists.txt @@ -9,6 +9,7 @@ add_llvm_library(LLVMCore Constants.cpp Core.cpp DebugLoc.cpp + DebugInfoProbe.cpp Dominators.cpp Function.cpp GVMaterializer.cpp diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp index 573efb7..9985ada 100644 --- a/lib/VMCore/ConstantFold.cpp +++ b/lib/VMCore/ConstantFold.cpp @@ -24,6 +24,7 @@ #include "llvm/Function.h" #include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" +#include "llvm/Operator.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" @@ -1735,7 +1736,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, // with a single zero index, it must be nonzero. assert(CE1->getNumOperands() == 2 && !CE1->getOperand(1)->isNullValue() && - "Suprising getelementptr!"); + "Surprising getelementptr!"); return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; } else { // If they are different globals, we don't know what the value is, diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp index 246fde1..15d7793 100644 --- a/lib/VMCore/Constants.cpp +++ b/lib/VMCore/Constants.cpp @@ -32,7 +32,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include <algorithm> -#include <map> +#include <cstdarg> using namespace llvm; //===----------------------------------------------------------------------===// @@ -325,27 +325,53 @@ ConstantInt::ConstantInt(const IntegerType *Ty, const APInt& V) assert(V.getBitWidth() == Ty->getBitWidth() && "Invalid constant for type"); } -ConstantInt* ConstantInt::getTrue(LLVMContext &Context) { +ConstantInt *ConstantInt::getTrue(LLVMContext &Context) { LLVMContextImpl *pImpl = Context.pImpl; if (!pImpl->TheTrueVal) pImpl->TheTrueVal = ConstantInt::get(Type::getInt1Ty(Context), 1); return pImpl->TheTrueVal; } -ConstantInt* ConstantInt::getFalse(LLVMContext &Context) { +ConstantInt *ConstantInt::getFalse(LLVMContext &Context) { LLVMContextImpl *pImpl = Context.pImpl; if (!pImpl->TheFalseVal) pImpl->TheFalseVal = ConstantInt::get(Type::getInt1Ty(Context), 0); return pImpl->TheFalseVal; } +Constant *ConstantInt::getTrue(const Type *Ty) { + const VectorType *VTy = dyn_cast<VectorType>(Ty); + if (!VTy) { + assert(Ty->isIntegerTy(1) && "True must be i1 or vector of i1."); + return ConstantInt::getTrue(Ty->getContext()); + } + assert(VTy->getElementType()->isIntegerTy(1) && + "True must be vector of i1 or i1."); + SmallVector<Constant*, 16> Splat(VTy->getNumElements(), + ConstantInt::getTrue(Ty->getContext())); + return ConstantVector::get(Splat); +} + +Constant *ConstantInt::getFalse(const Type *Ty) { + const VectorType *VTy = dyn_cast<VectorType>(Ty); + if (!VTy) { + assert(Ty->isIntegerTy(1) && "False must be i1 or vector of i1."); + return ConstantInt::getFalse(Ty->getContext()); + } + assert(VTy->getElementType()->isIntegerTy(1) && + "False must be vector of i1 or i1."); + SmallVector<Constant*, 16> Splat(VTy->getNumElements(), + ConstantInt::getFalse(Ty->getContext())); + return ConstantVector::get(Splat); +} + // Get a ConstantInt from an APInt. Note that the value stored in the DenseMap // as the key, is a DenseMapAPIntKeyInfo::KeyTy which has provided the // operator== and operator!= to ensure that the DenseMap doesn't attempt to // compare APInt's of different widths, which would violate an APInt class // invariant which generates an assertion. -ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt& V) { +ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt &V) { // Get the corresponding integer type for the bit width of the value. const IntegerType *ITy = IntegerType::get(Context, V.getBitWidth()); // get an existing value or the insertion position @@ -355,9 +381,8 @@ ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt& V) { return Slot; } -Constant *ConstantInt::get(const Type* Ty, uint64_t V, bool isSigned) { - Constant *C = get(cast<IntegerType>(Ty->getScalarType()), - V, isSigned); +Constant *ConstantInt::get(const Type *Ty, uint64_t V, bool isSigned) { + Constant *C = get(cast<IntegerType>(Ty->getScalarType()), V, isSigned); // For vectors, broadcast the value. if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) @@ -596,8 +621,6 @@ Constant *ConstantArray::get(LLVMContext &Context, StringRef Str, return get(ATy, ElementVals); } - - ConstantStruct::ConstantStruct(const StructType *T, const std::vector<Constant*> &V) : Constant(T, ConstantStructVal, @@ -644,6 +667,19 @@ Constant *ConstantStruct::get(LLVMContext &Context, return get(Context, std::vector<Constant*>(Vals, Vals+NumVals), Packed); } +Constant* ConstantStruct::get(LLVMContext &Context, bool Packed, + Constant * Val, ...) { + va_list ap; + std::vector<Constant*> Values; + va_start(ap, Val); + while (Val) { + Values.push_back(Val); + Val = va_arg(ap, llvm::Constant*); + } + va_end(ap); + return get(Context, Values, Packed); +} + ConstantVector::ConstantVector(const VectorType *T, const std::vector<Constant*> &V) : Constant(T, ConstantVectorVal, @@ -734,7 +770,7 @@ bool ConstantExpr::hasIndices() const { getOpcode() == Instruction::InsertValue; } -const SmallVector<unsigned, 4> &ConstantExpr::getIndices() const { +ArrayRef<unsigned> ConstantExpr::getIndices() const { if (const ExtractValueConstantExpr *EVCE = dyn_cast<ExtractValueConstantExpr>(this)) return EVCE->Indices; @@ -818,10 +854,10 @@ ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const { /// operands replaced with the specified values. The specified operands must /// match count and type with the existing ones. Constant *ConstantExpr:: -getWithOperands(Constant *const *Ops, unsigned NumOps) const { - assert(NumOps == getNumOperands() && "Operand count mismatch!"); +getWithOperands(ArrayRef<Constant*> Ops) const { + assert(Ops.size() == getNumOperands() && "Operand count mismatch!"); bool AnyChange = false; - for (unsigned i = 0; i != NumOps; ++i) { + for (unsigned i = 0; i != Ops.size(); ++i) { assert(Ops[i]->getType() == getOperand(i)->getType() && "Operand type mismatch!"); AnyChange |= Ops[i] != getOperand(i); @@ -853,8 +889,8 @@ getWithOperands(Constant *const *Ops, unsigned NumOps) const { return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]); case Instruction::GetElementPtr: return cast<GEPOperator>(this)->isInBounds() ? - ConstantExpr::getInBoundsGetElementPtr(Ops[0], &Ops[1], NumOps-1) : - ConstantExpr::getGetElementPtr(Ops[0], &Ops[1], NumOps-1); + ConstantExpr::getInBoundsGetElementPtr(Ops[0], &Ops[1], Ops.size()-1) : + ConstantExpr::getGetElementPtr(Ops[0], &Ops[1], Ops.size()-1); case Instruction::ICmp: case Instruction::FCmp: return ConstantExpr::getCompare(getPredicate(), Ops[0], Ops[1]); @@ -2114,7 +2150,7 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV, Constant *Agg = getOperand(0); if (Agg == From) Agg = To; - const SmallVector<unsigned, 4> &Indices = getIndices(); + ArrayRef<unsigned> Indices = getIndices(); Replacement = ConstantExpr::getExtractValue(Agg, &Indices[0], Indices.size()); } else if (getOpcode() == Instruction::InsertValue) { @@ -2123,7 +2159,7 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV, if (Agg == From) Agg = To; if (Val == From) Val = To; - const SmallVector<unsigned, 4> &Indices = getIndices(); + ArrayRef<unsigned> Indices = getIndices(); Replacement = ConstantExpr::getInsertValue(Agg, Val, &Indices[0], Indices.size()); } else if (isCast()) { diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h index ffc673f..1395754 100644 --- a/lib/VMCore/ConstantsContext.h +++ b/lib/VMCore/ConstantsContext.h @@ -301,20 +301,18 @@ struct OperandTraits<CompareConstantExpr> : DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CompareConstantExpr, Value) struct ExprMapKeyType { - typedef SmallVector<unsigned, 4> IndexList; - ExprMapKeyType(unsigned opc, - const std::vector<Constant*> &ops, + ArrayRef<Constant*> ops, unsigned short flags = 0, unsigned short optionalflags = 0, - const IndexList &inds = IndexList()) + ArrayRef<unsigned> inds = ArrayRef<unsigned>()) : opcode(opc), subclassoptionaldata(optionalflags), subclassdata(flags), - operands(ops), indices(inds) {} + operands(ops.begin(), ops.end()), indices(inds.begin(), inds.end()) {} uint8_t opcode; uint8_t subclassoptionaldata; uint16_t subclassdata; std::vector<Constant*> operands; - IndexList indices; + SmallVector<unsigned, 4> indices; bool operator==(const ExprMapKeyType& that) const { return this->opcode == that.opcode && this->subclassdata == that.subclassdata && @@ -465,7 +463,7 @@ struct ConstantKeyData<ConstantExpr> { CE->isCompare() ? CE->getPredicate() : 0, CE->getRawSubclassOptionalData(), CE->hasIndices() ? - CE->getIndices() : SmallVector<unsigned, 4>()); + CE->getIndices() : ArrayRef<unsigned>()); } }; diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp index 35c3a2e..92f9440 100644 --- a/lib/VMCore/Core.cpp +++ b/lib/VMCore/Core.cpp @@ -335,7 +335,7 @@ unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy) { void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest) { StructType *Ty = unwrap<StructType>(StructTy); - for (FunctionType::param_iterator I = Ty->element_begin(), + for (StructType::element_iterator I = Ty->element_begin(), E = Ty->element_end(); I != E; ++I) *Dest++ = wrap(*I); } @@ -543,7 +543,8 @@ LLVMValueRef LLVMMDString(const char *Str, unsigned SLen) { LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals, unsigned Count) { - return wrap(MDNode::get(*unwrap(C), unwrap<Value>(Vals, Count), Count)); + return wrap(MDNode::get(*unwrap(C), + ArrayRef<Value*>(unwrap<Value>(Vals, Count), Count))); } LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count) { @@ -2082,7 +2083,7 @@ LLVMValueRef LLVMBuildFCmp(LLVMBuilderRef B, LLVMRealPredicate Op, /*--.. Miscellaneous instructions ..........................................--*/ LLVMValueRef LLVMBuildPhi(LLVMBuilderRef B, LLVMTypeRef Ty, const char *Name) { - return wrap(unwrap(B)->CreatePHI(unwrap(Ty), Name)); + return wrap(unwrap(B)->CreatePHI(unwrap(Ty), 0, Name)); } LLVMValueRef LLVMBuildCall(LLVMBuilderRef B, LLVMValueRef Fn, diff --git a/lib/VMCore/DebugInfoProbe.cpp b/lib/VMCore/DebugInfoProbe.cpp new file mode 100644 index 0000000..334c3d8 --- /dev/null +++ b/lib/VMCore/DebugInfoProbe.cpp @@ -0,0 +1,258 @@ +//===-- DebugInfoProbe.cpp - DebugInfo Probe ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements DebugInfoProbe. This probe can be used by a pass +// manager to analyze how optimizer is treating debugging information. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "debuginfoprobe" +#include "llvm/DebugInfoProbe.h" +#include "llvm/Function.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Metadata.h" +#include "llvm/PassManager.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/DebugLoc.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringRef.h" +#include <set> +#include <string> + +using namespace llvm; + +static cl::opt<bool> +EnableDebugInfoProbe("enable-debug-info-probe", cl::Hidden, + cl::desc("Enable debug info probe")); + +// CreateInfoOutputFile - Return a file stream to print our output on. +namespace llvm { extern raw_ostream *CreateInfoOutputFile(); } + +//===----------------------------------------------------------------------===// +// DebugInfoProbeImpl - This class implements a interface to monitor +// how an optimization pass is preserving debugging information. + +namespace llvm { + + class DebugInfoProbeImpl { + public: + DebugInfoProbeImpl() : NumDbgLineLost(0),NumDbgValueLost(0) {} + void initialize(StringRef PName, Function &F); + void finalize(Function &F); + void report(); + private: + unsigned NumDbgLineLost, NumDbgValueLost; + std::string PassName; + Function *TheFn; + std::set<unsigned> LineNos; + std::set<MDNode *> DbgVariables; + std::set<Instruction *> MissingDebugLoc; + }; +} + +//===----------------------------------------------------------------------===// +// DebugInfoProbeImpl + +static void collect(Function &F, std::set<unsigned> &Lines) { + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + const DebugLoc &DL = BI->getDebugLoc(); + unsigned LineNo = 0; + if (!DL.isUnknown()) { + if (MDNode *N = DL.getInlinedAt(F.getContext())) + LineNo = DebugLoc::getFromDILocation(N).getLine(); + else + LineNo = DL.getLine(); + + Lines.insert(LineNo); + } + } +} + +/// initialize - Collect information before running an optimization pass. +void DebugInfoProbeImpl::initialize(StringRef PName, Function &F) { + if (!EnableDebugInfoProbe) return; + PassName = PName; + + LineNos.clear(); + DbgVariables.clear(); + TheFn = &F; + collect(F, LineNos); + + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + if (BI->getDebugLoc().isUnknown()) + MissingDebugLoc.insert(BI); + if (!isa<DbgInfoIntrinsic>(BI)) continue; + Value *Addr = NULL; + MDNode *Node = NULL; + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) { + Addr = DDI->getAddress(); + Node = DDI->getVariable(); + } else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) { + Addr = DVI->getValue(); + Node = DVI->getVariable(); + } + if (Addr) + DbgVariables.insert(Node); + } +} + +/// report - Report findings. This should be invoked after finalize. +void DebugInfoProbeImpl::report() { + if (!EnableDebugInfoProbe) return; + if (NumDbgLineLost || NumDbgValueLost) { + raw_ostream *OutStream = CreateInfoOutputFile(); + if (NumDbgLineLost) + *OutStream << NumDbgLineLost + << "\t times line number info lost by " + << PassName << "\n"; + if (NumDbgValueLost) + *OutStream << NumDbgValueLost + << "\t times variable info lost by " + << PassName << "\n"; + delete OutStream; + } + NumDbgLineLost = 0; + NumDbgValueLost = 0; +} + +/// finalize - Collect information after running an optimization pass. This +/// must be used after initialization. +void DebugInfoProbeImpl::finalize(Function &F) { + if (!EnableDebugInfoProbe) return; + std::set<unsigned> LineNos2; + collect(F, LineNos2); + assert (TheFn == &F && "Invalid function to measure!"); + + for (std::set<unsigned>::iterator I = LineNos.begin(), + E = LineNos.end(); I != E; ++I) { + unsigned LineNo = *I; + if (LineNos2.count(LineNo) == 0) { + DEBUG(dbgs() + << "DebugInfoProbe(" + << PassName + << "): Losing dbg info for source line " + << LineNo << "\n"); + ++NumDbgLineLost; + } + } + + std::set<MDNode *>DbgVariables2; + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + if (BI->getDebugLoc().isUnknown() && + MissingDebugLoc.count(BI) == 0) { + DEBUG(dbgs() << "DebugInfoProbe(" << PassName << "): --- "); + DEBUG(BI->print(dbgs())); + DEBUG(dbgs() << "\n"); + } + if (!isa<DbgInfoIntrinsic>(BI)) continue; + Value *Addr = NULL; + MDNode *Node = NULL; + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) { + Addr = DDI->getAddress(); + Node = DDI->getVariable(); + } else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) { + Addr = DVI->getValue(); + Node = DVI->getVariable(); + } + if (Addr) + DbgVariables2.insert(Node); + } + + for (std::set<MDNode *>::iterator I = DbgVariables.begin(), + E = DbgVariables.end(); I != E; ++I) { + if (DbgVariables2.count(*I) == 0 && (*I)->getNumOperands() >= 2) { + DEBUG(dbgs() + << "DebugInfoProbe(" + << PassName + << "): Losing dbg info for variable: "; + if (MDString *MDS = dyn_cast_or_null<MDString>( + (*I)->getOperand(2))) + dbgs() << MDS->getString(); + else + dbgs() << "..."; + dbgs() << "\n"); + ++NumDbgValueLost; + } + } +} + +//===----------------------------------------------------------------------===// +// DebugInfoProbe + +DebugInfoProbe::DebugInfoProbe() { + pImpl = new DebugInfoProbeImpl(); +} + +DebugInfoProbe::~DebugInfoProbe() { + delete pImpl; +} + +/// initialize - Collect information before running an optimization pass. +void DebugInfoProbe::initialize(StringRef PName, Function &F) { + pImpl->initialize(PName, F); +} + +/// finalize - Collect information after running an optimization pass. This +/// must be used after initialization. +void DebugInfoProbe::finalize(Function &F) { + pImpl->finalize(F); +} + +/// report - Report findings. This should be invoked after finalize. +void DebugInfoProbe::report() { + pImpl->report(); +} + +//===----------------------------------------------------------------------===// +// DebugInfoProbeInfo + +/// ~DebugInfoProbeInfo - Report data collected by all probes before deleting +/// them. +DebugInfoProbeInfo::~DebugInfoProbeInfo() { + if (!EnableDebugInfoProbe) return; + for (StringMap<DebugInfoProbe*>::iterator I = Probes.begin(), + E = Probes.end(); I != E; ++I) { + I->second->report(); + delete I->second; + } + } + +/// initialize - Collect information before running an optimization pass. +void DebugInfoProbeInfo::initialize(Pass *P, Function &F) { + if (!EnableDebugInfoProbe) return; + if (P->getAsPMDataManager()) + return; + + StringMapEntry<DebugInfoProbe *> &Entry = + Probes.GetOrCreateValue(P->getPassName()); + DebugInfoProbe *&Probe = Entry.getValue(); + if (!Probe) + Probe = new DebugInfoProbe(); + Probe->initialize(P->getPassName(), F); +} + +/// finalize - Collect information after running an optimization pass. This +/// must be used after initialization. +void DebugInfoProbeInfo::finalize(Pass *P, Function &F) { + if (!EnableDebugInfoProbe) return; + if (P->getAsPMDataManager()) + return; + StringMapEntry<DebugInfoProbe *> &Entry = + Probes.GetOrCreateValue(P->getPassName()); + DebugInfoProbe *&Probe = Entry.getValue(); + assert (Probe && "DebugInfoProbe is not initialized!"); + Probe->finalize(F); +} diff --git a/lib/VMCore/DebugLoc.cpp b/lib/VMCore/DebugLoc.cpp index f8b45ee..520333c 100644 --- a/lib/VMCore/DebugLoc.cpp +++ b/lib/VMCore/DebugLoc.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/DebugLoc.h" +#include "llvm/ADT/DenseMapInfo.h" #include "LLVMContextImpl.h" using namespace llvm; @@ -108,7 +109,7 @@ MDNode *DebugLoc::getAsMDNode(const LLVMContext &Ctx) const { ConstantInt::get(Int32, getLine()), ConstantInt::get(Int32, getCol()), Scope, IA }; - return MDNode::get(Ctx2, &Elts[0], 4); + return MDNode::get(Ctx2, Elts); } /// getFromDILocation - Translate the DILocation quad into a DebugLoc. @@ -128,6 +129,29 @@ DebugLoc DebugLoc::getFromDILocation(MDNode *N) { } //===----------------------------------------------------------------------===// +// DenseMap specialization +//===----------------------------------------------------------------------===// + +DebugLoc DenseMapInfo<DebugLoc>::getEmptyKey() { + return DebugLoc::getEmptyKey(); +} + +DebugLoc DenseMapInfo<DebugLoc>::getTombstoneKey() { + return DebugLoc::getTombstoneKey(); +} + +unsigned DenseMapInfo<DebugLoc>::getHashValue(const DebugLoc &Key) { + FoldingSetNodeID ID; + ID.AddInteger(Key.LineCol); + ID.AddInteger(Key.ScopeIdx); + return ID.ComputeHash(); +} + +bool DenseMapInfo<DebugLoc>::isEqual(const DebugLoc &LHS, const DebugLoc &RHS) { + return LHS == RHS; +} + +//===----------------------------------------------------------------------===// // LLVMContextImpl Implementation //===----------------------------------------------------------------------===// diff --git a/lib/VMCore/Dominators.cpp b/lib/VMCore/Dominators.cpp index c374b06..08b845e 100644 --- a/lib/VMCore/Dominators.cpp +++ b/lib/VMCore/Dominators.cpp @@ -68,9 +68,8 @@ void DominatorTree::verifyAnalysis() const { DominatorTree OtherDT; OtherDT.getBase().recalculate(F); if (compare(OtherDT)) { - errs() << "DominatorTree is not up to date! Computed:\n"; + errs() << "DominatorTree is not up to date!\nComputed:\n"; print(errs()); - errs() << "\nActual:\n"; OtherDT.print(errs()); abort(); diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp index 00d1d78..013c458 100644 --- a/lib/VMCore/Function.cpp +++ b/lib/VMCore/Function.cpp @@ -328,7 +328,7 @@ unsigned Function::getIntrinsicID() const { std::string Intrinsic::getName(ID id, const Type **Tys, unsigned numTys) { assert(id < num_intrinsics && "Invalid intrinsic ID!"); - const char * const Table[] = { + static const char * const Table[] = { "not_intrinsic", #define GET_INTRINSIC_NAME_TABLE #include "llvm/Intrinsics.gen" @@ -363,7 +363,7 @@ const FunctionType *Intrinsic::getType(LLVMContext &Context, } bool Intrinsic::isOverloaded(ID id) { - const bool OTable[] = { + static const bool OTable[] = { false, #define GET_INTRINSIC_OVERLOAD_TABLE #include "llvm/Intrinsics.gen" diff --git a/lib/VMCore/IRBuilder.cpp b/lib/VMCore/IRBuilder.cpp index 595dea4..2149155 100644 --- a/lib/VMCore/IRBuilder.cpp +++ b/lib/VMCore/IRBuilder.cpp @@ -23,13 +23,14 @@ using namespace llvm; /// has array of i8 type filled in with the nul terminated string value /// specified. If Name is specified, it is the name of the global variable /// created. -Value *IRBuilderBase::CreateGlobalString(const char *Str, const Twine &Name) { +Value *IRBuilderBase::CreateGlobalString(StringRef Str, const Twine &Name) { Constant *StrConstant = ConstantArray::get(Context, Str, true); Module &M = *BB->getParent()->getParent(); GlobalVariable *GV = new GlobalVariable(M, StrConstant->getType(), true, GlobalValue::InternalLinkage, StrConstant, "", 0, false); GV->setName(Name); + GV->setUnnamedAddr(true); return GV; } diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index d129028..61da9b6 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -131,26 +131,15 @@ Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) { return Removed; } -/// resizeOperands - resize operands - This adjusts the length of the operands -/// list according to the following behavior: -/// 1. If NumOps == 0, grow the operand list in response to a push_back style -/// of operation. This grows the number of ops by 1.5 times. -/// 2. If NumOps > NumOperands, reserve space for NumOps operands. -/// 3. If NumOps == NumOperands, trim the reserved space. +/// growOperands - grow operands - This grows the operand list in response +/// to a push_back style of operation. This grows the number of ops by 1.5 +/// times. /// -void PHINode::resizeOperands(unsigned NumOps) { +void PHINode::growOperands() { unsigned e = getNumOperands(); - if (NumOps == 0) { - NumOps = e*3/2; - if (NumOps < 4) NumOps = 4; // 4 op PHI nodes are VERY common. - } else if (NumOps*2 > NumOperands) { - // No resize needed. - if (ReservedSpace >= NumOps) return; - } else if (NumOps == NumOperands) { - if (ReservedSpace == NumOps) return; - } else { - return; - } + // Multiply by 1.5 and round down so the result is still even. + unsigned NumOps = e + e / 4 * 2; + if (NumOps < 4) NumOps = 4; // 4 op PHI nodes are VERY common. ReservedSpace = NumOps; Use *OldOps = OperandList; @@ -2297,8 +2286,12 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) { if (const VectorType *SrcPTy = dyn_cast<VectorType>(SrcTy)) { // Casting from vector return DestPTy->getBitWidth() == SrcPTy->getBitWidth(); - } else { // Casting from something else - return DestPTy->getBitWidth() == SrcBits; + } else if (DestPTy->getBitWidth() == SrcBits) { + return true; // float/int -> vector + } else if (SrcTy->isX86_MMXTy()) { + return DestPTy->getBitWidth() == 64; // MMX to 64-bit vector + } else { + return false; } } else if (DestTy->isPointerTy()) { // Casting to pointer if (SrcTy->isPointerTy()) { // Casting from pointer @@ -2308,8 +2301,12 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) { } else { // Casting from something else return false; } - } else if (DestTy->isX86_MMXTy()) { - return SrcBits == 64; + } else if (DestTy->isX86_MMXTy()) { + if (const VectorType *SrcPTy = dyn_cast<VectorType>(SrcTy)) { + return SrcPTy->getBitWidth() == 64; // 64-bit vector to MMX + } else { + return false; + } } else { // Casting to something else return false; } @@ -2990,7 +2987,7 @@ SwitchInst::~SwitchInst() { void SwitchInst::addCase(ConstantInt *OnVal, BasicBlock *Dest) { unsigned OpNo = NumOperands; if (OpNo+2 > ReservedSpace) - resizeOperands(0); // Get more space! + growOperands(); // Get more space! // Initialize some new operands. assert(OpNo+1 < ReservedSpace && "Growing didn't work!"); NumOperands = OpNo+2; @@ -3021,25 +3018,12 @@ void SwitchInst::removeCase(unsigned idx) { NumOperands = NumOps-2; } -/// resizeOperands - resize operands - This adjusts the length of the operands -/// list according to the following behavior: -/// 1. If NumOps == 0, grow the operand list in response to a push_back style -/// of operation. This grows the number of ops by 3 times. -/// 2. If NumOps > NumOperands, reserve space for NumOps operands. -/// 3. If NumOps == NumOperands, trim the reserved space. +/// growOperands - grow operands - This grows the operand list in response +/// to a push_back style of operation. This grows the number of ops by 3 times. /// -void SwitchInst::resizeOperands(unsigned NumOps) { +void SwitchInst::growOperands() { unsigned e = getNumOperands(); - if (NumOps == 0) { - NumOps = e*3; - } else if (NumOps*2 > NumOperands) { - // No resize needed. - if (ReservedSpace >= NumOps) return; - } else if (NumOps == NumOperands) { - if (ReservedSpace == NumOps) return; - } else { - return; - } + unsigned NumOps = e*3; ReservedSpace = NumOps; Use *NewOps = allocHungoffUses(NumOps); @@ -3077,25 +3061,12 @@ void IndirectBrInst::init(Value *Address, unsigned NumDests) { } -/// resizeOperands - resize operands - This adjusts the length of the operands -/// list according to the following behavior: -/// 1. If NumOps == 0, grow the operand list in response to a push_back style -/// of operation. This grows the number of ops by 2 times. -/// 2. If NumOps > NumOperands, reserve space for NumOps operands. -/// 3. If NumOps == NumOperands, trim the reserved space. +/// growOperands - grow operands - This grows the operand list in response +/// to a push_back style of operation. This grows the number of ops by 2 times. /// -void IndirectBrInst::resizeOperands(unsigned NumOps) { +void IndirectBrInst::growOperands() { unsigned e = getNumOperands(); - if (NumOps == 0) { - NumOps = e*2; - } else if (NumOps*2 > NumOperands) { - // No resize needed. - if (ReservedSpace >= NumOps) return; - } else if (NumOps == NumOperands) { - if (ReservedSpace == NumOps) return; - } else { - return; - } + unsigned NumOps = e*2; ReservedSpace = NumOps; Use *NewOps = allocHungoffUses(NumOps); @@ -3139,7 +3110,7 @@ IndirectBrInst::~IndirectBrInst() { void IndirectBrInst::addDestination(BasicBlock *DestBB) { unsigned OpNo = NumOperands; if (OpNo+1 > ReservedSpace) - resizeOperands(0); // Get more space! + growOperands(); // Get more space! // Initialize some new operands. assert(OpNo < ReservedSpace && "Growing didn't work!"); NumOperands = OpNo+1; diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h index 23971aa..6ea4b48 100644 --- a/lib/VMCore/LLVMContextImpl.h +++ b/lib/VMCore/LLVMContextImpl.h @@ -184,7 +184,7 @@ public: // Concrete/Abstract TypeDescriptions - We lazily calculate type descriptions // for types as they are needed. Because resolution of types must invalidate - // all of the abstract type descriptions, we keep them in a seperate map to + // all of the abstract type descriptions, we keep them in a separate map to // make this easy. TypePrinting ConcreteTypeDescriptions; TypePrinting AbstractTypeDescriptions; diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp index 0b8e8df..eb719e5 100644 --- a/lib/VMCore/Metadata.cpp +++ b/lib/VMCore/Metadata.cpp @@ -84,18 +84,18 @@ static MDNodeOperand *getOperandPtr(MDNode *N, unsigned Op) { return reinterpret_cast<MDNodeOperand*>(N+1)+Op; } -MDNode::MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals, - bool isFunctionLocal) +MDNode::MDNode(LLVMContext &C, ArrayRef<Value*> Vals, bool isFunctionLocal) : Value(Type::getMetadataTy(C), Value::MDNodeVal) { - NumOperands = NumVals; + NumOperands = Vals.size(); if (isFunctionLocal) setValueSubclassData(getSubclassDataFromValue() | FunctionLocalBit); // Initialize the operand list, which is co-allocated on the end of the node. + unsigned i = 0; for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands; - Op != E; ++Op, ++Vals) - new (Op) MDNodeOperand(*Vals, this); + Op != E; ++Op, ++i) + new (Op) MDNodeOperand(Vals[i], this); } @@ -183,9 +183,8 @@ static bool isFunctionLocalValue(Value *V) { (isa<MDNode>(V) && cast<MDNode>(V)->isFunctionLocal()); } -MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals, - unsigned NumVals, FunctionLocalness FL, - bool Insert) { +MDNode *MDNode::getMDNode(LLVMContext &Context, ArrayRef<Value*> Vals, + FunctionLocalness FL, bool Insert) { LLVMContextImpl *pImpl = Context.pImpl; // Add all the operand pointers. Note that we don't have to add the @@ -193,7 +192,7 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals, // Note that if the operands are later nulled out, the node will be // removed from the uniquing map. FoldingSetNodeID ID; - for (unsigned i = 0; i != NumVals; ++i) + for (unsigned i = 0; i != Vals.size(); ++i) ID.AddPointer(Vals[i]); void *InsertPoint; @@ -205,7 +204,7 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals, bool isFunctionLocal = false; switch (FL) { case FL_Unknown: - for (unsigned i = 0; i != NumVals; ++i) { + for (unsigned i = 0; i != Vals.size(); ++i) { Value *V = Vals[i]; if (!V) continue; if (isFunctionLocalValue(V)) { @@ -223,8 +222,8 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals, } // Coallocate space for the node and Operands together, then placement new. - void *Ptr = malloc(sizeof(MDNode)+NumVals*sizeof(MDNodeOperand)); - N = new (Ptr) MDNode(Context, Vals, NumVals, isFunctionLocal); + void *Ptr = malloc(sizeof(MDNode)+Vals.size()*sizeof(MDNodeOperand)); + N = new (Ptr) MDNode(Context, Vals, isFunctionLocal); // InsertPoint will have been set by the FindNodeOrInsertPos call. pImpl->MDNodeSet.InsertNode(N, InsertPoint); @@ -232,24 +231,24 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals, return N; } -MDNode *MDNode::get(LLVMContext &Context, Value*const* Vals, unsigned NumVals) { - return getMDNode(Context, Vals, NumVals, FL_Unknown); +MDNode *MDNode::get(LLVMContext &Context, ArrayRef<Value*> Vals) { + return getMDNode(Context, Vals, FL_Unknown); } -MDNode *MDNode::getWhenValsUnresolved(LLVMContext &Context, Value *const *Vals, - unsigned NumVals, bool isFunctionLocal) { - return getMDNode(Context, Vals, NumVals, isFunctionLocal ? FL_Yes : FL_No); +MDNode *MDNode::getWhenValsUnresolved(LLVMContext &Context, + ArrayRef<Value*> Vals, + bool isFunctionLocal) { + return getMDNode(Context, Vals, isFunctionLocal ? FL_Yes : FL_No); } -MDNode *MDNode::getIfExists(LLVMContext &Context, Value *const *Vals, - unsigned NumVals) { - return getMDNode(Context, Vals, NumVals, FL_Unknown, false); +MDNode *MDNode::getIfExists(LLVMContext &Context, ArrayRef<Value*> Vals) { + return getMDNode(Context, Vals, FL_Unknown, false); } -MDNode *MDNode::getTemporary(LLVMContext &Context, Value *const *Vals, - unsigned NumVals) { - MDNode *N = (MDNode *)malloc(sizeof(MDNode)+NumVals*sizeof(MDNodeOperand)); - N = new (N) MDNode(Context, Vals, NumVals, FL_No); +MDNode *MDNode::getTemporary(LLVMContext &Context, ArrayRef<Value*> Vals) { + MDNode *N = + (MDNode *)malloc(sizeof(MDNode)+Vals.size()*sizeof(MDNodeOperand)); + N = new (N) MDNode(Context, Vals, FL_No); N->setValueSubclassData(N->getSubclassDataFromValue() | NotUniquedBit); LeakDetector::addGarbageObject(N); diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp index 8bfef98..637fa79 100644 --- a/lib/VMCore/PassManager.cpp +++ b/lib/VMCore/PassManager.cpp @@ -14,6 +14,7 @@ #include "llvm/PassManagers.h" #include "llvm/PassManager.h" +#include "llvm/DebugInfoProbe.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CommandLine.h" @@ -25,6 +26,7 @@ #include "llvm/Support/PassNameParser.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Mutex.h" +#include "llvm/ADT/StringMap.h" #include <algorithm> #include <cstdio> #include <map> @@ -63,11 +65,13 @@ PassOptionList; // Print IR out before/after specified passes. static PassOptionList PrintBefore("print-before", - llvm::cl::desc("Print IR before specified passes")); + llvm::cl::desc("Print IR before specified passes"), + cl::Hidden); static PassOptionList PrintAfter("print-after", - llvm::cl::desc("Print IR after specified passes")); + llvm::cl::desc("Print IR after specified passes"), + cl::Hidden); static cl::opt<bool> PrintBeforeAll("print-before-all", @@ -440,6 +444,20 @@ char PassManagerImpl::ID = 0; namespace { //===----------------------------------------------------------------------===// +// DebugInfoProbe + +static DebugInfoProbeInfo *TheDebugProbe; +static void createDebugInfoProbe() { + if (TheDebugProbe) return; + + // Constructed the first time this is called. This guarantees that the + // object will be constructed, if -enable-debug-info-probe is set, + // before static globals, thus it will be destroyed before them. + static ManagedStatic<DebugInfoProbeInfo> DIP; + TheDebugProbe = &*DIP; +} + +//===----------------------------------------------------------------------===// /// TimingInfo Class - This class is used to calculate information about the /// amount of time each pass takes to execute. This only happens when /// -time-passes is enabled on the command line. @@ -964,7 +982,7 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) { // Keep track of higher level analysis used by this manager. HigherLevelAnalysis.push_back(PRequired); } else - llvm_unreachable("Unable to accomodate Required Pass"); + llvm_unreachable("Unable to accommodate Required Pass"); } // Set P as P's last user until someone starts using P. @@ -1428,6 +1446,7 @@ void FunctionPassManagerImpl::releaseMemoryOnTheFly() { bool FunctionPassManagerImpl::run(Function &F) { bool Changed = false; TimingInfo::createTheTimeInfo(); + createDebugInfoProbe(); initializeAllAnalysisInfo(); for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) @@ -1475,13 +1494,16 @@ bool FPPassManager::runOnFunction(Function &F) { dumpRequiredSet(FP); initializeAnalysisImpl(FP); - + if (TheDebugProbe) + TheDebugProbe->initialize(FP, F); { PassManagerPrettyStackEntry X(FP, F); TimeRegion PassTimer(getPassTimer(FP)); LocalChanged |= FP->runOnFunction(F); } + if (TheDebugProbe) + TheDebugProbe->finalize(FP, F); Changed |= LocalChanged; if (LocalChanged) @@ -1629,6 +1651,7 @@ Pass* MPPassManager::getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F){ bool PassManagerImpl::run(Module &M) { bool Changed = false; TimingInfo::createTheTimeInfo(); + createDebugInfoProbe(); dumpArguments(); dumpPasses(); diff --git a/lib/VMCore/PassRegistry.cpp b/lib/VMCore/PassRegistry.cpp index c97a170..fa92620 100644 --- a/lib/VMCore/PassRegistry.cpp +++ b/lib/VMCore/PassRegistry.cpp @@ -26,7 +26,7 @@ using namespace llvm; // FIXME: We use ManagedStatic to erase the pass registrar on shutdown. // Unfortunately, passes are registered with static ctors, and having -// llvm_shutdown clear this map prevents successful ressurection after +// llvm_shutdown clear this map prevents successful resurrection after // llvm_shutdown is run. Ideally we should find a solution so that we don't // leak the map, AND can still resurrect after shutdown. static ManagedStatic<PassRegistry> PassRegistryObj; diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp index be28ad1..b15304c 100644 --- a/lib/VMCore/Type.cpp +++ b/lib/VMCore/Type.cpp @@ -17,6 +17,7 @@ #include "llvm/Assembly/Writer.h" #include "llvm/LLVMContext.h" #include "llvm/Metadata.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/SCCIterator.h" @@ -460,7 +461,7 @@ bool FunctionType::isValidArgumentType(const Type *ArgTy) { } FunctionType::FunctionType(const Type *Result, - const std::vector<const Type*> &Params, + ArrayRef<const Type*> Params, bool IsVarArgs) : DerivedType(Result->getContext(), FunctionTyID), isVarArgs(IsVarArgs) { ContainedTys = reinterpret_cast<PATypeHandle*>(this+1); @@ -483,7 +484,7 @@ FunctionType::FunctionType(const Type *Result, } StructType::StructType(LLVMContext &C, - const std::vector<const Type*> &Types, bool isPacked) + ArrayRef<const Type*> Types, bool isPacked) : CompositeType(C, StructTyID) { ContainedTys = reinterpret_cast<PATypeHandle*>(this + 1); NumContainedTys = Types.size(); @@ -838,7 +839,7 @@ FunctionValType FunctionValType::get(const FunctionType *FT) { // FunctionType::get - The factory function for the FunctionType class... FunctionType *FunctionType::get(const Type *ReturnType, - const std::vector<const Type*> &Params, + ArrayRef<const Type*> Params, bool isVarArg) { FunctionValType VT(ReturnType, Params, isVarArg); FunctionType *FT = 0; @@ -915,7 +916,7 @@ bool VectorType::isValidElementType(const Type *ElemTy) { // StructType *StructType::get(LLVMContext &Context, - const std::vector<const Type*> &ETypes, + ArrayRef<const Type*> ETypes, bool isPacked) { StructValType STV(ETypes, isPacked); StructType *ST = 0; diff --git a/lib/VMCore/TypesContext.h b/lib/VMCore/TypesContext.h index 4694486..ad09478 100644 --- a/lib/VMCore/TypesContext.h +++ b/lib/VMCore/TypesContext.h @@ -15,6 +15,7 @@ #ifndef LLVM_TYPESCONTEXT_H #define LLVM_TYPESCONTEXT_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include <map> @@ -157,8 +158,8 @@ class StructValType { std::vector<const Type*> ElTypes; bool packed; public: - StructValType(const std::vector<const Type*> &args, bool isPacked) - : ElTypes(args), packed(isPacked) {} + StructValType(ArrayRef<const Type*> args, bool isPacked) + : ElTypes(args.vec()), packed(isPacked) {} static StructValType get(const StructType *ST) { std::vector<const Type *> ElTypes; @@ -187,8 +188,8 @@ class FunctionValType { std::vector<const Type*> ArgTypes; bool isVarArg; public: - FunctionValType(const Type *ret, const std::vector<const Type*> &args, - bool isVA) : RetTy(ret), ArgTypes(args), isVarArg(isVA) {} + FunctionValType(const Type *ret, ArrayRef<const Type*> args, bool isVA) + : RetTy(ret), ArgTypes(args.vec()), isVarArg(isVA) {} static FunctionValType get(const FunctionType *FT); @@ -369,7 +370,7 @@ public: // Remove the old entry form TypesByHash. If the hash values differ // now, remove it from the old place. Otherwise, continue scanning - // withing this hashcode to reduce work. + // within this hashcode to reduce work. if (NewTypeHash != OldTypeHash) { RemoveFromTypesByHash(OldTypeHash, Ty); } else { diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 58ec6fe..8b89110 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -471,6 +471,23 @@ void Verifier::visitGlobalVariable(GlobalVariable &GV) { "invalid linkage type for global declaration", &GV); } + if (GV.hasName() && (GV.getName() == "llvm.global_ctors" || + GV.getName() == "llvm.global_dtors")) { + Assert1(!GV.hasInitializer() || GV.hasAppendingLinkage(), + "invalid linkage for intrinsic global variable", &GV); + // Don't worry about emitting an error for it not being an array, + // visitGlobalValue will complain on appending non-array. + if (const ArrayType *ATy = dyn_cast<ArrayType>(GV.getType())) { + const StructType *STy = dyn_cast<StructType>(ATy->getElementType()); + const PointerType *FuncPtrTy = + FunctionType::get(Type::getVoidTy(*Context), false)->getPointerTo(); + Assert1(STy && STy->getNumElements() == 2 && + STy->getTypeAtIndex(0u)->isIntegerTy(32) && + STy->getTypeAtIndex(1) == FuncPtrTy, + "wrong type for intrinsic global variable", &GV); + } + } + visitGlobalValue(GV); } @@ -826,30 +843,10 @@ void Verifier::visitReturnInst(ReturnInst &RI) { Assert2(N == 0, "Found return instr that returns non-void in Function of void " "return type!", &RI, F->getReturnType()); - else if (N == 1 && F->getReturnType() == RI.getOperand(0)->getType()) { - // Exactly one return value and it matches the return type. Good. - } else if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) { - // The return type is a struct; check for multiple return values. - Assert2(STy->getNumElements() == N, - "Incorrect number of return values in ret instruction!", - &RI, F->getReturnType()); - for (unsigned i = 0; i != N; ++i) - Assert2(STy->getElementType(i) == RI.getOperand(i)->getType(), - "Function return type does not match operand " - "type of return inst!", &RI, F->getReturnType()); - } else if (const ArrayType *ATy = dyn_cast<ArrayType>(F->getReturnType())) { - // The return type is an array; check for multiple return values. - Assert2(ATy->getNumElements() == N, - "Incorrect number of return values in ret instruction!", - &RI, F->getReturnType()); - for (unsigned i = 0; i != N; ++i) - Assert2(ATy->getElementType() == RI.getOperand(i)->getType(), - "Function return type does not match operand " - "type of return inst!", &RI, F->getReturnType()); - } else { - CheckFailed("Function return type does not match operand " - "type of return inst!", &RI, F->getReturnType()); - } + else + Assert2(N == 1 && F->getReturnType() == RI.getOperand(0)->getType(), + "Function return type does not match operand " + "type of return inst!", &RI, F->getReturnType()); // Check to make sure that the return value has necessary properties for // terminators... |