diff options
author | rdivacky <rdivacky@FreeBSD.org> | 2009-10-23 14:19:52 +0000 |
---|---|---|
committer | rdivacky <rdivacky@FreeBSD.org> | 2009-10-23 14:19:52 +0000 |
commit | 9643cca39fb9fb3b49a8912926de98acf882283c (patch) | |
tree | 22cc59e4b240d84c3a5a60531119c4eca914a256 /lib | |
parent | 1adacceba9c9ee0f16e54388e56c9a249b296f75 (diff) | |
download | FreeBSD-src-9643cca39fb9fb3b49a8912926de98acf882283c.zip FreeBSD-src-9643cca39fb9fb3b49a8912926de98acf882283c.tar.gz |
Update LLVM to r84949.
Diffstat (limited to 'lib')
182 files changed, 6893 insertions, 3684 deletions
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index c5be616..756ffea 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -40,10 +40,6 @@ using namespace llvm; // Useful predicates //===----------------------------------------------------------------------===// -static const GEPOperator *isGEP(const Value *V) { - return dyn_cast<GEPOperator>(V); -} - static const Value *GetGEPOperands(const Value *V, SmallVector<Value*, 16> &GEPOps) { assert(GEPOps.empty() && "Expect empty list to populate!"); @@ -53,7 +49,7 @@ static const Value *GetGEPOperands(const Value *V, // Accumulate all of the chained indexes into the operand array V = cast<User>(V)->getOperand(0); - while (const User *G = isGEP(V)) { + while (const GEPOperator *G = dyn_cast<GEPOperator>(V)) { if (!isa<Constant>(GEPOps[0]) || isa<GlobalValue>(GEPOps[0]) || !cast<Constant>(GEPOps[0])->isNullValue()) break; // Don't handle folding arbitrary pointer offsets yet... @@ -222,7 +218,7 @@ namespace { private: // VisitedPHIs - Track PHI nodes visited by a aliasCheck() call. - SmallSet<const PHINode*, 16> VisitedPHIs; + SmallPtrSet<const PHINode*, 16> VisitedPHIs; // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction // against another. @@ -358,11 +354,13 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { if (alias(II->getOperand(2), PtrSize, P, Size) == NoAlias) return NoModRef; } + break; case Intrinsic::invariant_end: { unsigned PtrSize = cast<ConstantInt>(II->getOperand(2))->getZExtValue(); if (alias(II->getOperand(3), PtrSize, P, Size) == NoAlias) return NoModRef; } + break; } } } @@ -400,7 +398,7 @@ BasicAliasAnalysis::aliasGEP(const Value *V1, unsigned V1Size, // Note that we also handle chains of getelementptr instructions as well as // constant expression getelementptrs here. // - if (isGEP(V1) && isGEP(V2)) { + if (isa<GEPOperator>(V1) && isa<GEPOperator>(V2)) { const User *GEP1 = cast<User>(V1); const User *GEP2 = cast<User>(V2); @@ -419,13 +417,13 @@ BasicAliasAnalysis::aliasGEP(const Value *V1, unsigned V1Size, // Drill down into the first non-gep value, to test for must-aliasing of // the base pointers. - while (isGEP(GEP1->getOperand(0)) && + while (isa<GEPOperator>(GEP1->getOperand(0)) && GEP1->getOperand(1) == Constant::getNullValue(GEP1->getOperand(1)->getType())) GEP1 = cast<User>(GEP1->getOperand(0)); const Value *BasePtr1 = GEP1->getOperand(0); - while (isGEP(GEP2->getOperand(0)) && + while (isa<GEPOperator>(GEP2->getOperand(0)) && GEP2->getOperand(1) == Constant::getNullValue(GEP2->getOperand(1)->getType())) GEP2 = cast<User>(GEP2->getOperand(0)); @@ -531,7 +529,7 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize, if (!VisitedPHIs.insert(PN)) return MayAlias; - SmallSet<Value*, 4> UniqueSrc; + SmallPtrSet<Value*, 4> UniqueSrc; SmallVector<Value*, 4> V1Srcs; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *PV1 = PN->getIncomingValue(i); @@ -555,7 +553,7 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize, // NoAlias / MustAlias. Otherwise, returns MayAlias. for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) { Value *V = V1Srcs[i]; - AliasResult ThisAlias = aliasCheck(V, PNSize, V2, V2Size); + AliasResult ThisAlias = aliasCheck(V2, V2Size, V, PNSize); if (ThisAlias != Alias || ThisAlias == MayAlias) return MayAlias; } @@ -617,11 +615,11 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size, isNonEscapingLocalObject(O1) && O1 != O2) return NoAlias; - if (!isGEP(V1) && isGEP(V2)) { + if (!isa<GEPOperator>(V1) && isa<GEPOperator>(V2)) { std::swap(V1, V2); std::swap(V1Size, V2Size); } - if (isGEP(V1)) + if (isa<GEPOperator>(V1)) return aliasGEP(V1, V1Size, V2, V2Size); if (isa<PHINode>(V2) && !isa<PHINode>(V1)) { diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp index 6fed400..08f070c 100644 --- a/lib/Analysis/CFGPrinter.cpp +++ b/lib/Analysis/CFGPrinter.cpp @@ -17,71 +17,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Pass.h" #include "llvm/Analysis/CFGPrinter.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/Support/CFG.h" + +#include "llvm/Pass.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/GraphWriter.h" using namespace llvm; -namespace llvm { -template<> -struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits { - static std::string getGraphName(const Function *F) { - return "CFG for '" + F->getNameStr() + "' function"; - } - - static std::string getNodeLabel(const BasicBlock *Node, - const Function *Graph, - bool ShortNames) { - if (ShortNames && !Node->getName().empty()) - return Node->getNameStr() + ":"; - - std::string Str; - raw_string_ostream OS(Str); - - if (ShortNames) { - WriteAsOperand(OS, Node, false); - return OS.str(); - } - - if (Node->getName().empty()) { - WriteAsOperand(OS, Node, false); - OS << ":"; - } - - OS << *Node; - std::string OutStr = OS.str(); - if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); - - // Process string output to make it nicer... - for (unsigned i = 0; i != OutStr.length(); ++i) - if (OutStr[i] == '\n') { // Left justify - OutStr[i] = '\\'; - OutStr.insert(OutStr.begin()+i+1, 'l'); - } else if (OutStr[i] == ';') { // Delete comments! - unsigned Idx = OutStr.find('\n', i+1); // Find end of line - OutStr.erase(OutStr.begin()+i, OutStr.begin()+Idx); - --i; - } - - return OutStr; - } - - static std::string getEdgeSourceLabel(const BasicBlock *Node, - succ_const_iterator I) { - // Label source of conditional branches with "T" or "F" - if (const BranchInst *BI = dyn_cast<BranchInst>(Node->getTerminator())) - if (BI->isConditional()) - return (I == succ_begin(Node)) ? "T" : "F"; - return ""; - } -}; -} - namespace { struct VISIBILITY_HIDDEN CFGViewer : public FunctionPass { static char ID; // Pass identifcation, replacement for typeid diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 1d2f118..d4be986 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -11,6 +11,7 @@ add_llvm_library(LLVMAnalysis ConstantFolding.cpp DbgInfoPrinter.cpp DebugInfo.cpp + DomPrinter.cpp IVUsers.cpp InlineCost.cpp InstCount.cpp diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 0ce1c24..214caeb 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -24,9 +24,10 @@ #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" -#include "llvm/Target/TargetData.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" @@ -92,6 +93,265 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, return false; } +/// ReadDataFromGlobal - Recursive helper to read bits out of global. C is the +/// constant being copied out of. ByteOffset is an offset into C. CurPtr is the +/// pointer to copy results into and BytesLeft is the number of bytes left in +/// the CurPtr buffer. TD is the target data. +static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, + unsigned char *CurPtr, unsigned BytesLeft, + const TargetData &TD) { + assert(ByteOffset <= TD.getTypeAllocSize(C->getType()) && + "Out of range access"); + + if (isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) + return true; + + if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) { + if (CI->getBitWidth() > 64 || + (CI->getBitWidth() & 7) != 0) + return false; + + uint64_t Val = CI->getZExtValue(); + unsigned IntBytes = unsigned(CI->getBitWidth()/8); + + for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) { + CurPtr[i] = (unsigned char)(Val >> ByteOffset * 8); + ++ByteOffset; + } + return true; + } + + if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) { + if (CFP->getType()->isDoubleTy()) { + C = ConstantExpr::getBitCast(C, Type::getInt64Ty(C->getContext())); + return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); + } + if (CFP->getType()->isFloatTy()){ + C = ConstantExpr::getBitCast(C, Type::getInt32Ty(C->getContext())); + return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); + } + } + + if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) { + const StructLayout *SL = TD.getStructLayout(CS->getType()); + unsigned Index = SL->getElementContainingOffset(ByteOffset); + uint64_t CurEltOffset = SL->getElementOffset(Index); + ByteOffset -= CurEltOffset; + + while (1) { + // If the element access is to the element itself and not to tail padding, + // read the bytes from the element. + uint64_t EltSize = TD.getTypeAllocSize(CS->getOperand(Index)->getType()); + + if (ByteOffset < EltSize && + !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr, + BytesLeft, TD)) + return false; + + ++Index; + + // Check to see if we read from the last struct element, if so we're done. + if (Index == CS->getType()->getNumElements()) + return true; + + // If we read all of the bytes we needed from this element we're done. + uint64_t NextEltOffset = SL->getElementOffset(Index); + + if (BytesLeft <= NextEltOffset-CurEltOffset-ByteOffset) + return true; + + // Move to the next element of the struct. + BytesLeft -= NextEltOffset-CurEltOffset-ByteOffset; + ByteOffset = 0; + CurEltOffset = NextEltOffset; + } + // not reached. + } + + if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) { + uint64_t EltSize = TD.getTypeAllocSize(CA->getType()->getElementType()); + uint64_t Index = ByteOffset / EltSize; + uint64_t Offset = ByteOffset - Index * EltSize; + for (; Index != CA->getType()->getNumElements(); ++Index) { + if (!ReadDataFromGlobal(CA->getOperand(Index), Offset, CurPtr, + BytesLeft, TD)) + return false; + if (EltSize >= BytesLeft) + return true; + + Offset = 0; + BytesLeft -= EltSize; + CurPtr += EltSize; + } + return true; + } + + if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) { + uint64_t EltSize = TD.getTypeAllocSize(CV->getType()->getElementType()); + uint64_t Index = ByteOffset / EltSize; + uint64_t Offset = ByteOffset - Index * EltSize; + for (; Index != CV->getType()->getNumElements(); ++Index) { + if (!ReadDataFromGlobal(CV->getOperand(Index), Offset, CurPtr, + BytesLeft, TD)) + return false; + if (EltSize >= BytesLeft) + return true; + + Offset = 0; + BytesLeft -= EltSize; + CurPtr += EltSize; + } + return true; + } + + // Otherwise, unknown initializer type. + return false; +} + +static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, + const TargetData &TD) { + const Type *LoadTy = cast<PointerType>(C->getType())->getElementType(); + const IntegerType *IntType = dyn_cast<IntegerType>(LoadTy); + + // If this isn't an integer load we can't fold it directly. + if (!IntType) { + // If this is a float/double load, we can try folding it as an int32/64 load + // and then bitcast the result. This can be useful for union cases. Note + // that address spaces don't matter here since we're not going to result in + // an actual new load. + const Type *MapTy; + if (LoadTy->isFloatTy()) + MapTy = Type::getInt32PtrTy(C->getContext()); + else if (LoadTy->isDoubleTy()) + MapTy = Type::getInt64PtrTy(C->getContext()); + else if (isa<VectorType>(LoadTy)) { + MapTy = IntegerType::get(C->getContext(), + TD.getTypeAllocSizeInBits(LoadTy)); + MapTy = PointerType::getUnqual(MapTy); + } else + return 0; + + C = ConstantExpr::getBitCast(C, MapTy); + if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, TD)) + return ConstantExpr::getBitCast(Res, LoadTy); + return 0; + } + + unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8; + if (BytesLoaded > 32 || BytesLoaded == 0) return 0; + + GlobalValue *GVal; + int64_t Offset; + if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD)) + return 0; + + GlobalVariable *GV = dyn_cast<GlobalVariable>(GVal); + if (!GV || !GV->isConstant() || !GV->hasInitializer() || + !GV->hasDefinitiveInitializer() || + !GV->getInitializer()->getType()->isSized()) + return 0; + + // If we're loading off the beginning of the global, some bytes may be valid, + // but we don't try to handle this. + if (Offset < 0) return 0; + + // If we're not accessing anything in this constant, the result is undefined. + if (uint64_t(Offset) >= TD.getTypeAllocSize(GV->getInitializer()->getType())) + return UndefValue::get(IntType); + + unsigned char RawBytes[32] = {0}; + if (!ReadDataFromGlobal(GV->getInitializer(), Offset, RawBytes, + BytesLoaded, TD)) + return 0; + + APInt ResultVal(IntType->getBitWidth(), 0); + for (unsigned i = 0; i != BytesLoaded; ++i) { + ResultVal <<= 8; + ResultVal |= APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1-i]); + } + + return ConstantInt::get(IntType->getContext(), ResultVal); +} + +/// ConstantFoldLoadFromConstPtr - Return the value that a load from C would +/// produce if it is constant and determinable. If this is not determinable, +/// return null. +Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, + const TargetData *TD) { + // First, try the easy cases: + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) + if (GV->isConstant() && GV->hasDefinitiveInitializer()) + return GV->getInitializer(); + + // If the loaded value isn't a constant expr, we can't handle it. + ConstantExpr *CE = dyn_cast<ConstantExpr>(C); + if (!CE) return 0; + + if (CE->getOpcode() == Instruction::GetElementPtr) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0))) + if (GV->isConstant() && GV->hasDefinitiveInitializer()) + if (Constant *V = + ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE)) + return V; + } + + // Instead of loading constant c string, use corresponding integer value + // directly if string length is small enough. + std::string Str; + if (TD && GetConstantStringInfo(CE->getOperand(0), Str) && !Str.empty()) { + unsigned StrLen = Str.length(); + const Type *Ty = cast<PointerType>(CE->getType())->getElementType(); + unsigned NumBits = Ty->getPrimitiveSizeInBits(); + // Replace LI with immediate integer store. + if ((NumBits >> 3) == StrLen + 1) { + APInt StrVal(NumBits, 0); + APInt SingleChar(NumBits, 0); + if (TD->isLittleEndian()) { + for (signed i = StrLen-1; i >= 0; i--) { + SingleChar = (uint64_t) Str[i] & UCHAR_MAX; + StrVal = (StrVal << 8) | SingleChar; + } + } else { + for (unsigned i = 0; i < StrLen; i++) { + SingleChar = (uint64_t) Str[i] & UCHAR_MAX; + StrVal = (StrVal << 8) | SingleChar; + } + // Append NULL at the end. + SingleChar = 0; + StrVal = (StrVal << 8) | SingleChar; + } + return ConstantInt::get(CE->getContext(), StrVal); + } + } + + // If this load comes from anywhere in a constant global, and if the global + // is all undef or zero, we know what it loads. + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getUnderlyingObject())){ + if (GV->isConstant() && GV->hasDefinitiveInitializer()) { + const Type *ResTy = cast<PointerType>(C->getType())->getElementType(); + if (GV->getInitializer()->isNullValue()) + return Constant::getNullValue(ResTy); + if (isa<UndefValue>(GV->getInitializer())) + return UndefValue::get(ResTy); + } + } + + // Try hard to fold loads from bitcasted strange and non-type-safe things. We + // currently don't do any of this for big endian systems. It can be + // generalized in the future if someone is interested. + if (TD && TD->isLittleEndian()) + return FoldReinterpretLoadFromConstPtr(CE, *TD); + return 0; +} + +static Constant *ConstantFoldLoadInst(const LoadInst *LI, const TargetData *TD){ + if (LI->isVolatile()) return 0; + + if (Constant *C = dyn_cast<Constant>(LI->getOperand(0))) + return ConstantFoldLoadFromConstPtr(C, TD); + + return 0; +} /// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression. /// Attempt to symbolically evaluate the result of a binary operator merging @@ -380,6 +640,9 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, LLVMContext &Context, Ops.data(), Ops.size(), Context, TD); + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) + return ConstantFoldLoadInst(LI, TD); + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops.data(), Ops.size(), Context, TD); } @@ -640,15 +903,15 @@ Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, C = UndefValue::get(ATy->getElementType()); else return 0; - } else if (const VectorType *PTy = dyn_cast<VectorType>(*I)) { - if (CI->getZExtValue() >= PTy->getNumElements()) + } else if (const VectorType *VTy = dyn_cast<VectorType>(*I)) { + if (CI->getZExtValue() >= VTy->getNumElements()) return 0; if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) C = CP->getOperand(CI->getZExtValue()); else if (isa<ConstantAggregateZero>(C)) - C = Constant::getNullValue(PTy->getElementType()); + C = Constant::getNullValue(VTy->getElementType()); else if (isa<UndefValue>(C)) - C = UndefValue::get(PTy->getElementType()); + C = UndefValue::get(VTy->getElementType()); else return 0; } else { diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp new file mode 100644 index 0000000..f1b44d0 --- /dev/null +++ b/lib/Analysis/DomPrinter.cpp @@ -0,0 +1,265 @@ +//===- DomPrinter.cpp - DOT printer for the dominance trees ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines '-dot-dom' and '-dot-postdom' analysis passes, which emit +// a dom.<fnname>.dot or postdom.<fnname>.dot file for each function in the +// program, with a graph of the dominance/postdominance tree of that +// function. +// +// There are also passes available to directly call dotty ('-view-dom' or +// '-view-postdom'). By appending '-only' like '-dot-dom-only' only the +// names of the bbs are printed, but the content is hidden. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DomPrinter.h" +#include "llvm/Pass.h" +#include "llvm/Function.h" +#include "llvm/Analysis/CFGPrinter.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/PostDominators.h" + +using namespace llvm; + +namespace llvm { +template<> +struct DOTGraphTraits<DomTreeNode*> : public DefaultDOTGraphTraits { + static std::string getNodeLabel(DomTreeNode *Node, DomTreeNode *Graph, + bool ShortNames) { + + BasicBlock *BB = Node->getBlock(); + + if (!BB) + return "Post dominance root node"; + + return DOTGraphTraits<const Function*>::getNodeLabel(BB, BB->getParent(), + ShortNames); + } +}; + +template<> +struct DOTGraphTraits<DominatorTree*> : public DOTGraphTraits<DomTreeNode*> { + + static std::string getGraphName(DominatorTree *DT) { + return "Dominator tree"; + } + + static std::string getNodeLabel(DomTreeNode *Node, + DominatorTree *G, + bool ShortNames) { + return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode(), + ShortNames); + } +}; + +template<> +struct DOTGraphTraits<PostDominatorTree*> + : public DOTGraphTraits<DomTreeNode*> { + static std::string getGraphName(PostDominatorTree *DT) { + return "Post dominator tree"; + } + static std::string getNodeLabel(DomTreeNode *Node, + PostDominatorTree *G, + bool ShortNames) { + return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, + G->getRootNode(), + ShortNames); + } +}; +} + +namespace { +template <class Analysis, bool OnlyBBS> +struct GenericGraphViewer : public FunctionPass { + std::string Name; + + GenericGraphViewer(std::string GraphName, const void *ID) : FunctionPass(ID) { + Name = GraphName; + } + + virtual bool runOnFunction(Function &F) { + Analysis *Graph; + + Graph = &getAnalysis<Analysis>(); + ViewGraph(Graph, Name, OnlyBBS); + + return false; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<Analysis>(); + } +}; + +struct DomViewer + : public GenericGraphViewer<DominatorTree, false> { + static char ID; + DomViewer() : GenericGraphViewer<DominatorTree, false>("dom", &ID){} +}; + +struct DomOnlyViewer + : public GenericGraphViewer<DominatorTree, true> { + static char ID; + DomOnlyViewer() : GenericGraphViewer<DominatorTree, true>("domonly", &ID){} +}; + +struct PostDomViewer + : public GenericGraphViewer<PostDominatorTree, false> { + static char ID; + PostDomViewer() : + GenericGraphViewer<PostDominatorTree, false>("postdom", &ID){} +}; + +struct PostDomOnlyViewer + : public GenericGraphViewer<PostDominatorTree, true> { + static char ID; + PostDomOnlyViewer() : + GenericGraphViewer<PostDominatorTree, true>("postdomonly", &ID){} +}; +} // end anonymous namespace + +char DomViewer::ID = 0; +RegisterPass<DomViewer> A("view-dom", + "View dominance tree of function"); + +char DomOnlyViewer::ID = 0; +RegisterPass<DomOnlyViewer> B("view-dom-only", + "View dominance tree of function " + "(with no function bodies)"); + +char PostDomViewer::ID = 0; +RegisterPass<PostDomViewer> C("view-postdom", + "View postdominance tree of function"); + +char PostDomOnlyViewer::ID = 0; +RegisterPass<PostDomOnlyViewer> D("view-postdom-only", + "View postdominance tree of function " + "(with no function bodies)"); + +namespace { +template <class Analysis, bool OnlyBBS> +struct GenericGraphPrinter : public FunctionPass { + + std::string Name; + + GenericGraphPrinter(std::string GraphName, const void *ID) + : FunctionPass(ID) { + Name = GraphName; + } + + virtual bool runOnFunction(Function &F) { + Analysis *Graph; + std::string Filename = Name + "." + F.getNameStr() + ".dot"; + errs() << "Writing '" << Filename << "'..."; + + std::string ErrorInfo; + raw_fd_ostream File(Filename.c_str(), ErrorInfo); + Graph = &getAnalysis<Analysis>(); + + if (ErrorInfo.empty()) + WriteGraph(File, Graph, OnlyBBS); + else + errs() << " error opening file for writing!"; + errs() << "\n"; + return false; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<Analysis>(); + } +}; + +struct DomPrinter + : public GenericGraphPrinter<DominatorTree, false> { + static char ID; + DomPrinter() : GenericGraphPrinter<DominatorTree, false>("dom", &ID) {} +}; + +struct DomOnlyPrinter + : public GenericGraphPrinter<DominatorTree, true> { + static char ID; + DomOnlyPrinter() : GenericGraphPrinter<DominatorTree, true>("domonly", &ID) {} +}; + +struct PostDomPrinter + : public GenericGraphPrinter<PostDominatorTree, false> { + static char ID; + PostDomPrinter() : + GenericGraphPrinter<PostDominatorTree, false>("postdom", &ID) {} +}; + +struct PostDomOnlyPrinter + : public GenericGraphPrinter<PostDominatorTree, true> { + static char ID; + PostDomOnlyPrinter() : + GenericGraphPrinter<PostDominatorTree, true>("postdomonly", &ID) {} +}; +} // end anonymous namespace + + + +char DomPrinter::ID = 0; +RegisterPass<DomPrinter> E("dot-dom", + "Print dominance tree of function " + "to 'dot' file"); + +char DomOnlyPrinter::ID = 0; +RegisterPass<DomOnlyPrinter> F("dot-dom-only", + "Print dominance tree of function " + "to 'dot' file " + "(with no function bodies)"); + +char PostDomPrinter::ID = 0; +RegisterPass<PostDomPrinter> G("dot-postdom", + "Print postdominance tree of function " + "to 'dot' file"); + +char PostDomOnlyPrinter::ID = 0; +RegisterPass<PostDomOnlyPrinter> H("dot-postdom-only", + "Print postdominance tree of function " + "to 'dot' file " + "(with no function bodies)"); + +// Create methods available outside of this file, to use them +// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by +// the link time optimization. + +FunctionPass *llvm::createDomPrinterPass() { + return new DomPrinter(); +} + +FunctionPass *llvm::createDomOnlyPrinterPass() { + return new DomOnlyPrinter(); +} + +FunctionPass *llvm::createDomViewerPass() { + return new DomViewer(); +} + +FunctionPass *llvm::createDomOnlyViewerPass() { + return new DomOnlyViewer(); +} + +FunctionPass *llvm::createPostDomPrinterPass() { + return new PostDomPrinter(); +} + +FunctionPass *llvm::createPostDomOnlyPrinterPass() { + return new PostDomOnlyPrinter(); +} + +FunctionPass *llvm::createPostDomViewerPass() { + return new PostDomViewer(); +} + +FunctionPass *llvm::createPostDomOnlyViewerPass() { + return new PostDomOnlyViewer(); +} diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index f5c1108..7949288 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -303,7 +303,7 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { // Check the value being stored. Value *Ptr = SI->getOperand(0)->getUnderlyingObject(); - if (isa<MallocInst>(Ptr) || isMalloc(Ptr)) { + if (isMalloc(Ptr)) { // Okay, easy case. } else if (CallInst *CI = dyn_cast<CallInst>(Ptr)) { Function *F = CI->getCalledFunction(); @@ -439,8 +439,7 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { if (cast<StoreInst>(*II).isVolatile()) // Treat volatile stores as reading memory somewhere. FunctionEffect |= Ref; - } else if (isa<MallocInst>(*II) || isa<FreeInst>(*II) || - isMalloc(&cast<Instruction>(*II))) { + } else if (isMalloc(&cast<Instruction>(*II)) || isa<FreeInst>(*II)) { FunctionEffect |= ModRef; } diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 3b0d2c9..b833baa 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -131,7 +131,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { } // These, too, are calls. - if (isa<MallocInst>(II) || isa<FreeInst>(II)) + if (isa<FreeInst>(II)) NumInsts += InlineConstants::CallPenalty; if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp index 83724ca..4cde793 100644 --- a/lib/Analysis/InstCount.cpp +++ b/lib/Analysis/InstCount.cpp @@ -76,11 +76,11 @@ FunctionPass *llvm::createInstCountPass() { return new InstCount(); } bool InstCount::runOnFunction(Function &F) { unsigned StartMemInsts = NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst + - NumInvokeInst + NumAllocaInst + NumMallocInst + NumFreeInst; + NumInvokeInst + NumAllocaInst + NumFreeInst; visit(F); unsigned EndMemInsts = NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst + - NumInvokeInst + NumAllocaInst + NumMallocInst + NumFreeInst; + NumInvokeInst + NumAllocaInst + NumFreeInst; TotalMemInst += EndMemInsts-StartMemInsts; return false; } diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index ce2d29f..e9256b7 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -292,6 +292,9 @@ bool Loop::isLoopSimplifyForm() const { // Normal-form loops have a single backedge. if (!getLoopLatch()) return false; + // Sort the blocks vector so that we can use binary search to do quick + // lookups. + SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end()); // Each predecessor of each exit block of a normal loop is contained // within the loop. SmallVector<BasicBlock *, 4> ExitBlocks; @@ -299,7 +302,7 @@ bool Loop::isLoopSimplifyForm() const { for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) for (pred_iterator PI = pred_begin(ExitBlocks[i]), PE = pred_end(ExitBlocks[i]); PI != PE; ++PI) - if (!contains(*PI)) + if (!LoopBBs.count(*PI)) return false; // All the requirements are met. return true; diff --git a/lib/Analysis/MallocHelper.cpp b/lib/Analysis/MallocHelper.cpp index 89051d1..e7bb41e 100644 --- a/lib/Analysis/MallocHelper.cpp +++ b/lib/Analysis/MallocHelper.cpp @@ -130,9 +130,9 @@ static bool isArrayMallocHelper(const CallInst *CI, LLVMContext &Context, /// matches the malloc call IR generated by CallInst::CreateMalloc(). This /// means that it is a malloc call with one bitcast use AND the malloc call's /// size argument is: -/// 1. a constant not equal to the malloc's allocated type +/// 1. a constant not equal to the size of the malloced type /// or -/// 2. the result of a multiplication by the malloc's allocated type +/// 2. the result of a multiplication by the size of the malloced type /// Otherwise it returns NULL. /// The unique bitcast is needed to determine the type/size of the array /// allocation. @@ -183,25 +183,60 @@ const Type* llvm::getMallocAllocatedType(const CallInst* CI) { return PT ? PT->getElementType() : NULL; } +/// isSafeToGetMallocArraySize - Returns true if the array size of a malloc can +/// be determined. It can be determined in these 3 cases of malloc codegen: +/// 1. non-array malloc: The malloc's size argument is a constant and equals the /// size of the type being malloced. +/// 2. array malloc: This is a malloc call with one bitcast use AND the malloc +/// call's size argument is a constant multiple of the size of the malloced +/// type. +/// 3. array malloc: This is a malloc call with one bitcast use AND the malloc +/// call's size argument is the result of a multiplication by the size of the +/// malloced type. +/// Otherwise returns false. +static bool isSafeToGetMallocArraySize(const CallInst *CI, + LLVMContext &Context, + const TargetData* TD) { + if (!CI) + return false; + + // Type must be known to determine array size. + const Type* T = getMallocAllocatedType(CI); + if (!T) return false; + + Value* MallocArg = CI->getOperand(1); + Constant *ElementSize = ConstantExpr::getSizeOf(T); + ElementSize = ConstantExpr::getTruncOrBitCast(ElementSize, + MallocArg->getType()); + + // First, check if it is a non-array malloc. + if (isa<ConstantExpr>(MallocArg) && (MallocArg == ElementSize)) + return true; + + // Second, check if it can be determined that this is an array malloc. + return isArrayMallocHelper(CI, Context, TD); +} + /// isConstantOne - Return true only if val is constant int 1. static bool isConstantOne(Value *val) { return isa<ConstantInt>(val) && cast<ConstantInt>(val)->isOne(); } -/// getMallocArraySize - Returns the array size of a malloc call. The array -/// size is computated in 1 of 3 ways: -/// 1. If the element type if of size 1, then array size is the argument to +/// getMallocArraySize - Returns the array size of a malloc call. For array +/// mallocs, the size is computated in 1 of 3 ways: +/// 1. If the element type is of size 1, then array size is the argument to /// malloc. /// 2. Else if the malloc's argument is a constant, the array size is that /// argument divided by the element type's size. /// 3. Else the malloc argument must be a multiplication and the array size is /// the first operand of the multiplication. -/// This function returns constant 1 if: -/// 1. The malloc call's allocated type cannot be determined. -/// 2. IR wasn't created by a call to CallInst::CreateMalloc() with a non-NULL -/// ArraySize. +/// For non-array mallocs, the computed size is constant 1. +/// This function returns NULL for all mallocs whose array size cannot be +/// determined. Value* llvm::getMallocArraySize(CallInst* CI, LLVMContext &Context, const TargetData* TD) { + if (!isSafeToGetMallocArraySize(CI, Context, TD)) + return NULL; + // Match CreateMalloc's use of constant 1 array-size for non-array mallocs. if (!isArrayMalloc(CI, Context, TD)) return ConstantInt::get(CI->getOperand(1)->getType(), 1); diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/PointerTracking.cpp index 43f4af3..2309fbc 100644 --- a/lib/Analysis/PointerTracking.cpp +++ b/lib/Analysis/PointerTracking.cpp @@ -102,8 +102,9 @@ const SCEV *PointerTracking::computeAllocationCount(Value *P, if (CallInst *CI = extractMallocCall(V)) { Value *arraySize = getMallocArraySize(CI, P->getContext(), TD); - Ty = getMallocAllocatedType(CI); - if (!Ty || !arraySize) return SE->getCouldNotCompute(); + const Type* AllocTy = getMallocAllocatedType(CI); + if (!AllocTy || !arraySize) return SE->getCouldNotCompute(); + Ty = AllocTy; // arraySize elements of type Ty. return SE->getSCEV(arraySize); } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index baa347a..dc0d489 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -469,26 +469,11 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, break; } - case Instruction::Alloca: - case Instruction::Malloc: { + case Instruction::Alloca: { AllocationInst *AI = cast<AllocationInst>(V); unsigned Align = AI->getAlignment(); - if (Align == 0 && TD) { - if (isa<AllocaInst>(AI)) - Align = TD->getABITypeAlignment(AI->getType()->getElementType()); - else if (isa<MallocInst>(AI)) { - // Malloc returns maximally aligned memory. - Align = TD->getABITypeAlignment(AI->getType()->getElementType()); - Align = - std::max(Align, - (unsigned)TD->getABITypeAlignment( - Type::getDoubleTy(V->getContext()))); - Align = - std::max(Align, - (unsigned)TD->getABITypeAlignment( - Type::getInt64Ty(V->getContext()))); - } - } + if (Align == 0 && TD) + Align = TD->getABITypeAlignment(AI->getType()->getElementType()); if (Align > 0) KnownZero = Mask & APInt::getLowBitsSet(BitWidth, diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 0e9f1a0..f6cea88 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -529,7 +529,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(module); KEYWORD(asm); KEYWORD(sideeffect); - KEYWORD(msasm); + KEYWORD(alignstack); KEYWORD(gc); KEYWORD(ccc); @@ -602,6 +602,10 @@ lltok::Kind LLLexer::LexIdentifier() { // Scan CurPtr ahead, seeing if there is just whitespace before the newline. if (JustWhitespaceNewLine(CurPtr)) return lltok::kw_zeroext; + } else if (Len == 6 && !memcmp(StartChar, "malloc", 6)) { + // FIXME: Remove in LLVM 3.0. + // Autoupgrade malloc instruction. + return lltok::kw_malloc; } // Keywords for instructions. @@ -641,7 +645,6 @@ lltok::Kind LLLexer::LexIdentifier() { INSTKEYWORD(unwind, Unwind); INSTKEYWORD(unreachable, Unreachable); - INSTKEYWORD(malloc, Malloc); INSTKEYWORD(alloca, Alloca); INSTKEYWORD(free, Free); INSTKEYWORD(load, Load); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 09bc5f7..271567b 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -69,6 +69,23 @@ bool LLParser::Run() { /// ValidateEndOfModule - Do final validity and sanity checks at the end of the /// module. bool LLParser::ValidateEndOfModule() { + // Update auto-upgraded malloc calls to "malloc". + // FIXME: Remove in LLVM 3.0. + if (MallocF) { + MallocF->setName("malloc"); + // If setName() does not set the name to "malloc", then there is already a + // declaration of "malloc". In that case, iterate over all calls to MallocF + // and get them to call the declared "malloc" instead. + if (MallocF->getName() != "malloc") { + Constant* RealMallocF = M->getFunction("malloc"); + if (RealMallocF->getType() != MallocF->getType()) + RealMallocF = ConstantExpr::getBitCast(RealMallocF, MallocF->getType()); + MallocF->replaceAllUsesWith(RealMallocF); + MallocF->eraseFromParent(); + MallocF = NULL; + } + } + if (!ForwardRefTypes.empty()) return Error(ForwardRefTypes.begin()->second.second, "use of undefined type named '" + @@ -1029,14 +1046,12 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) { /// ::= /* empty */ /// ::= !dbg !42 bool LLParser::ParseOptionalCustomMetadata() { - - std::string Name; - if (Lex.getKind() == lltok::NamedOrCustomMD) { - Name = Lex.getStrVal(); - Lex.Lex(); - } else + if (Lex.getKind() != lltok::NamedOrCustomMD) return false; + std::string Name = Lex.getStrVal(); + Lex.Lex(); + if (Lex.getKind() != lltok::Metadata) return TokError("Expected '!' here"); Lex.Lex(); @@ -1047,7 +1062,7 @@ bool LLParser::ParseOptionalCustomMetadata() { MetadataContext &TheMetadata = M->getContext().getMetadata(); unsigned MDK = TheMetadata.getMDKind(Name.c_str()); if (!MDK) - MDK = TheMetadata.RegisterMDKind(Name.c_str()); + MDK = TheMetadata.registerMDKind(Name.c_str()); MDsOnInst.push_back(std::make_pair(MDK, cast<MDNode>(Node))); return false; @@ -1959,17 +1974,17 @@ bool LLParser::ParseValID(ValID &ID) { return false; case lltok::kw_asm: { - // ValID ::= 'asm' SideEffect? MsAsm? STRINGCONSTANT ',' STRINGCONSTANT - bool HasSideEffect, MsAsm; + // ValID ::= 'asm' SideEffect? AlignStack? STRINGCONSTANT ',' STRINGCONSTANT + bool HasSideEffect, AlignStack; Lex.Lex(); if (ParseOptionalToken(lltok::kw_sideeffect, HasSideEffect) || - ParseOptionalToken(lltok::kw_msasm, MsAsm) || + ParseOptionalToken(lltok::kw_alignstack, AlignStack) || ParseStringConstant(ID.StrVal) || ParseToken(lltok::comma, "expected comma in inline asm expression") || ParseToken(lltok::StringConstant, "expected constraint string")) return true; ID.StrVal2 = Lex.getStrVal(); - ID.UIntVal = HasSideEffect | ((unsigned)MsAsm<<1); + ID.UIntVal = HasSideEffect | ((unsigned)AlignStack<<1); ID.Kind = ValID::t_InlineAsm; return false; } @@ -2783,8 +2798,8 @@ bool LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, case lltok::kw_call: return ParseCall(Inst, PFS, false); case lltok::kw_tail: return ParseCall(Inst, PFS, true); // Memory. - case lltok::kw_alloca: - case lltok::kw_malloc: return ParseAlloc(Inst, PFS, KeywordVal); + case lltok::kw_alloca: return ParseAlloc(Inst, PFS); + case lltok::kw_malloc: return ParseAlloc(Inst, PFS, BB, false); case lltok::kw_free: return ParseFree(Inst, PFS); case lltok::kw_load: return ParseLoad(Inst, PFS, false); case lltok::kw_store: return ParseStore(Inst, PFS, false); @@ -2858,8 +2873,6 @@ bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB, if (ParseType(Ty, true /*void allowed*/)) return true; if (Ty->isVoidTy()) { - if (EatIfPresent(lltok::comma)) - if (ParseOptionalCustomMetadata()) return true; Inst = ReturnInst::Create(Context); return false; } @@ -2895,8 +2908,6 @@ bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB, } } } - if (EatIfPresent(lltok::comma)) - if (ParseOptionalCustomMetadata()) return true; Inst = ReturnInst::Create(Context, RV); return false; @@ -3294,7 +3305,7 @@ bool LLParser::ParseShuffleVector(Instruction *&Inst, PerFunctionState &PFS) { } /// ParsePHI -/// ::= 'phi' Type '[' Value ',' Value ']' (',' '[' Value ',' Valueß ']')* +/// ::= 'phi' Type '[' Value ',' Value ']' (',' '[' Value ',' Value ']')* bool LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) { PATypeHolder Ty(Type::getVoidTy(Context)); Value *Op0, *Op1; @@ -3315,6 +3326,9 @@ bool LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) { if (!EatIfPresent(lltok::comma)) break; + if (Lex.getKind() == lltok::NamedOrCustomMD) + break; + if (ParseToken(lltok::lsquare, "expected '[' in phi value list") || ParseValue(Ty, Op0, PFS) || ParseToken(lltok::comma, "expected ',' after insertelement value") || @@ -3323,6 +3337,9 @@ bool LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) { return true; } + if (Lex.getKind() == lltok::NamedOrCustomMD) + if (ParseOptionalCustomMetadata()) return true; + if (!Ty->isFirstClassType()) return Error(TypeLoc, "phi node must have first class type"); @@ -3439,7 +3456,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, /// ::= 'malloc' Type (',' TypeAndValue)? (',' OptionalInfo)? /// ::= 'alloca' Type (',' TypeAndValue)? (',' OptionalInfo)? bool LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS, - unsigned Opc) { + BasicBlock* BB, bool isAlloca) { PATypeHolder Ty(Type::getVoidTy(Context)); Value *Size = 0; LocTy SizeLoc; @@ -3460,10 +3477,20 @@ bool LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS, if (Size && Size->getType() != Type::getInt32Ty(Context)) return Error(SizeLoc, "element count must be i32"); - if (Opc == Instruction::Malloc) - Inst = new MallocInst(Ty, Size, Alignment); - else + if (isAlloca) { Inst = new AllocaInst(Ty, Size, Alignment); + return false; + } + + // Autoupgrade old malloc instruction to malloc call. + // FIXME: Remove in LLVM 3.0. + const Type *IntPtrTy = Type::getInt32Ty(Context); + if (!MallocF) + // Prototype malloc as "void *(int32)". + // This function is renamed as "malloc" in ValidateEndOfModule(). + MallocF = cast<Function>( + M->getOrInsertFunction("", Type::getInt8PtrTy(Context), IntPtrTy, NULL)); + Inst = CallInst::CreateMalloc(BB, IntPtrTy, Ty, Size, MallocF); return false; } diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 97bf2f3..5dd6a2e 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -75,9 +75,11 @@ namespace llvm { std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals; std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs; std::vector<GlobalValue*> NumberedVals; + Function* MallocF; public: LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) : - Context(m->getContext()), Lex(F, SM, Err, m->getContext()), M(m) {} + Context(m->getContext()), Lex(F, SM, Err, m->getContext()), + M(m), MallocF(NULL) {} bool Run(); LLVMContext& getContext() { return Context; } @@ -276,7 +278,8 @@ namespace llvm { bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS); bool ParsePHI(Instruction *&I, PerFunctionState &PFS); bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail); - bool ParseAlloc(Instruction *&I, PerFunctionState &PFS, unsigned Opc); + bool ParseAlloc(Instruction *&I, PerFunctionState &PFS, + BasicBlock *BB = 0, bool isAlloca = true); bool ParseFree(Instruction *&I, PerFunctionState &PFS); bool ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile); bool ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index b3c59ee..f5072fe 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -62,9 +62,8 @@ namespace lltok { kw_module, kw_asm, kw_sideeffect, - kw_msasm, + kw_alignstack, kw_gc, - kw_dbg, kw_c, kw_cc, kw_ccc, kw_fastcc, kw_coldcc, diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 4eb12c6..3a385cb 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -840,7 +840,15 @@ bool BitcodeReader::ParseMetadata() { for (unsigned i = 1; i != RecordLength; ++i) Name[i-1] = Record[i]; MetadataContext &TheMetadata = Context.getMetadata(); - TheMetadata.MDHandlerNames[Name.str()] = Kind; + unsigned ExistingKind = TheMetadata.getMDKind(Name.str()); + if (ExistingKind == 0) { + unsigned NewKind = TheMetadata.registerMDKind(Name.str()); + assert (Kind == NewKind + && "Unable to handle custom metadata mismatch!"); + } else { + assert (ExistingKind == Kind + && "Unable to handle custom metadata mismatch!"); + } break; } } @@ -1165,7 +1173,7 @@ bool BitcodeReader::ParseConstants() { if (Record.size() < 2) return Error("Invalid INLINEASM record"); std::string AsmStr, ConstrStr; bool HasSideEffects = Record[0] & 1; - bool IsMsAsm = Record[0] >> 1; + bool IsAlignStack = Record[0] >> 1; unsigned AsmStrSize = Record[1]; if (2+AsmStrSize >= Record.size()) return Error("Invalid INLINEASM record"); @@ -1179,7 +1187,7 @@ bool BitcodeReader::ParseConstants() { ConstrStr += (char)Record[3+AsmStrSize+i]; const PointerType *PTy = cast<PointerType>(CurTy); V = InlineAsm::get(cast<FunctionType>(PTy->getElementType()), - AsmStr, ConstrStr, HasSideEffects, IsMsAsm); + AsmStr, ConstrStr, HasSideEffects, IsAlignStack); break; } } @@ -2044,14 +2052,18 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { } case bitc::FUNC_CODE_INST_MALLOC: { // MALLOC: [instty, op, align] + // Autoupgrade malloc instruction to malloc call. + // FIXME: Remove in LLVM 3.0. if (Record.size() < 3) return Error("Invalid MALLOC record"); const PointerType *Ty = dyn_cast_or_null<PointerType>(getTypeByID(Record[0])); Value *Size = getFnValueByID(Record[1], Type::getInt32Ty(Context)); - unsigned Align = Record[2]; if (!Ty || !Size) return Error("Invalid MALLOC record"); - I = new MallocInst(Ty->getElementType(), Size, (1 << Align) >> 1); + if (!CurBB) return Error("Invalid malloc instruction with no BB"); + const Type *Int32Ty = IntegerType::getInt32Ty(CurBB->getContext()); + I = CallInst::CreateMalloc(CurBB, Int32Ty, Ty->getElementType(), + Size, NULL); InstructionList.push_back(I); break; } diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 12a1f5e..037854e 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -517,9 +517,7 @@ static void WriteModuleMetadata(const ValueEnumerator &VE, } // Code: [strchar x N] - const char *StrBegin = MDS->begin(); - for (unsigned i = 0, e = MDS->length(); i != e; ++i) - Record.push_back(StrBegin[i]); + Record.append(MDS->begin(), MDS->end()); // Emit the finished record. Stream.EmitRecord(bitc::METADATA_STRING, Record, MDSAbbrev); @@ -563,22 +561,22 @@ static void WriteMetadataAttachment(const Function &F, // Write metadata attachments // METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]] MetadataContext &TheMetadata = F.getContext().getMetadata(); + typedef SmallVector<std::pair<unsigned, TrackingVH<MDNode> >, 2> MDMapTy; + MDMapTy MDs; for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - const MetadataContext::MDMapTy *P = TheMetadata.getMDs(I); - if (!P) continue; + MDs.clear(); + TheMetadata.getMDs(I, MDs); bool RecordedInstruction = false; - for (MetadataContext::MDMapTy::const_iterator PI = P->begin(), - PE = P->end(); PI != PE; ++PI) { - if (MDNode *ND = dyn_cast_or_null<MDNode>(PI->second)) { - if (RecordedInstruction == false) { - Record.push_back(VE.getInstructionID(I)); - RecordedInstruction = true; - } - Record.push_back(PI->first); - Record.push_back(VE.getValueID(ND)); + for (MDMapTy::const_iterator PI = MDs.begin(), PE = MDs.end(); + PI != PE; ++PI) { + if (RecordedInstruction == false) { + Record.push_back(VE.getInstructionID(I)); + RecordedInstruction = true; } + Record.push_back(PI->first); + Record.push_back(VE.getValueID(PI->second)); } if (!Record.empty()) { if (!StartedMetadataBlock) { @@ -604,11 +602,13 @@ static void WriteModuleMetadataStore(const Module *M, // Write metadata kinds // METADATA_KIND - [n x [id, name]] MetadataContext &TheMetadata = M->getContext().getMetadata(); - const StringMap<unsigned> *Kinds = TheMetadata.getHandlerNames(); - for (StringMap<unsigned>::const_iterator - I = Kinds->begin(), E = Kinds->end(); I != E; ++I) { - Record.push_back(I->second); - StringRef KName = I->first(); + SmallVector<std::pair<unsigned, StringRef>, 4> Names; + TheMetadata.getHandlerNames(Names); + for (SmallVector<std::pair<unsigned, StringRef>, 4>::iterator + I = Names.begin(), + E = Names.end(); I != E; ++I) { + Record.push_back(I->first); + StringRef KName = I->second; for (unsigned i = 0, e = KName.size(); i != e; ++i) Record.push_back(KName[i]); if (!StartedMetadataBlock) { @@ -680,7 +680,7 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) { Record.push_back(unsigned(IA->hasSideEffects()) | - unsigned(IA->isMsAsm()) << 1); + unsigned(IA->isAlignStack()) << 1); // Add the asm string. const std::string &AsmStr = IA->getAsmString(); @@ -1054,13 +1054,6 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, Vals.push_back(VE.getValueID(I.getOperand(i))); break; - case Instruction::Malloc: - Code = bitc::FUNC_CODE_INST_MALLOC; - Vals.push_back(VE.getTypeID(I.getType())); - Vals.push_back(VE.getValueID(I.getOperand(0))); // size. - Vals.push_back(Log2_32(cast<MallocInst>(I).getAlignment())+1); - break; - case Instruction::Free: Code = bitc::FUNC_CODE_INST_FREE; PushValueAndType(I.getOperand(0), InstID, Vals, VE); diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index 60253ad..85aa5fa 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -87,6 +87,8 @@ ValueEnumerator::ValueEnumerator(const Module *M) { EnumerateType(I->getType()); MetadataContext &TheMetadata = F->getContext().getMetadata(); + typedef SmallVector<std::pair<unsigned, TrackingVH<MDNode> >, 2> MDMapTy; + MDMapTy MDs; for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;++I){ for (User::const_op_iterator OI = I->op_begin(), E = I->op_end(); @@ -99,12 +101,11 @@ ValueEnumerator::ValueEnumerator(const Module *M) { EnumerateAttributes(II->getAttributes()); // Enumerate metadata attached with this instruction. - const MetadataContext::MDMapTy *MDs = TheMetadata.getMDs(I); - if (MDs) - for (MetadataContext::MDMapTy::const_iterator MI = MDs->begin(), - ME = MDs->end(); MI != ME; ++MI) - if (MDNode *MDN = dyn_cast_or_null<MDNode>(MI->second)) - EnumerateMetadata(MDN); + MDs.clear(); + TheMetadata.getMDs(I, MDs); + for (MDMapTy::const_iterator MI = MDs.begin(), ME = MDs.end(); MI != ME; + ++MI) + EnumerateMetadata(MI->second); } } @@ -214,10 +215,9 @@ void ValueEnumerator::EnumerateMetadata(const MetadataBase *MD) { MDValues.push_back(std::make_pair(MD, 1U)); MDValueMap[MD] = MDValues.size(); MDValueID = MDValues.size(); - for (MDNode::const_elem_iterator I = N->elem_begin(), E = N->elem_end(); - I != E; ++I) { - if (*I) - EnumerateValue(*I); + for (unsigned i = 0, e = N->getNumElements(); i != e; ++i) { + if (Value *V = N->getElement(i)) + EnumerateValue(V); else EnumerateType(Type::getVoidTy(MD->getContext())); } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 2914851..d50e5e3 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -923,7 +923,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, AddType(DW_Unit, &Buffer, FromTy); // Add name if not anonymous or intermediate type. - if (Name) + if (Name && Tag != dwarf::DW_TAG_pointer_type) AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); // Add size if non-zero (derived types might be zero-sized.) @@ -1811,10 +1811,18 @@ void DwarfDebug::CollectVariableInfo() { DIVariable DV (Var); if (DV.isNull()) continue; unsigned VSlot = VI->second; + DbgScope *Scope = NULL; DenseMap<MDNode *, DbgScope *>::iterator DSI = DbgScopeMap.find(DV.getContext().getNode()); - assert (DSI != DbgScopeMap.end() && "Unable to find variable scope!"); - DbgScope *Scope = DSI->second; + if (DSI != DbgScopeMap.end()) + Scope = DSI->second; + else + // There is not any instruction assocated with this scope, so get + // a new scope. + Scope = getDbgScope(DV.getContext().getNode(), + NULL /* Not an instruction */, + NULL /* Not inlined */); + assert (Scope && "Unable to find variable scope!"); Scope->AddVariable(new DbgVariable(DV, VSlot, false)); } } diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 626523b..6c03b55 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -871,28 +871,7 @@ void DwarfException::EmitExceptionTable() { Asm->EOL("Next action"); } - // Emit the Catch Clauses. The code for the catch clauses following the same - // try is similar to a switch statement. The catch clause action record - // informs the runtime about the type of a catch clause and about the - // associated switch value. - // - // Action Record Fields: - // - // * Filter Value - // Positive value, starting at 1. Index in the types table of the - // __typeinfo for the catch-clause type. 1 is the first word preceding - // TTBase, 2 is the second word, and so on. Used by the runtime to check - // if the thrown exception type matches the catch-clause type. Back-end - // generated switch statements check against this value. - // - // * Next - // Signed offset, in bytes from the start of this field, to the next - // chained action record, or zero if none. - // - // The order of the action records determined by the next field is the order - // of the catch clauses as they appear in the source code, and must be kept in - // the same order. As a result, changing the order of the catch clause would - // change the semantics of the program. + // Emit the Catch TypeInfos. for (std::vector<GlobalVariable *>::const_reverse_iterator I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { const GlobalVariable *GV = *I; @@ -907,12 +886,15 @@ void DwarfException::EmitExceptionTable() { Asm->EOL("TypeInfo"); } - // Emit the Type Table. + // Emit the Exception Specifications. for (std::vector<unsigned>::const_iterator I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) { unsigned TypeID = *I; Asm->EmitULEB128Bytes(TypeID); - Asm->EOL("Filter TypeInfo index"); + if (TypeID != 0) + Asm->EOL("Exception specification"); + else + Asm->EOL(); } Asm->EmitAlignment(2, 0, 0, false); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index f9abeac..66c5aa5 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -945,15 +945,15 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { } } - // If this block doesn't fall through (e.g. it ends with an uncond branch or - // has no successors) and if the pred falls through into this block, and if - // it would otherwise fall through into the block after this, move this - // block to the end of the function. + // If this block has no successors (e.g. it is a return block or ends with + // a call to a no-return function like abort or __cxa_throw) and if the pred + // falls through into this block, and if it would otherwise fall through + // into the block after this, move this block to the end of the function. // // We consider it more likely that execution will stay in the function (e.g. // due to loops) than it is to exit it. This asserts in loops etc, moving // the assert condition out of the loop body. - if (!PriorCond.empty() && PriorFBB == 0 && + if (MBB->succ_empty() && !PriorCond.empty() && PriorFBB == 0 && MachineFunction::iterator(PriorTBB) == FallThrough && !CanFallThrough(MBB)) { bool DoTransform = true; diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index 42b24a6..6fff12c 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -34,14 +34,6 @@ namespace { const TargetInstrInfo *TII; const TargetLowering *TLI; - /// ChangedMBBs - BBs which are modified by OptimizeIntraLoopEdges. - SmallPtrSet<MachineBasicBlock*, 8> ChangedMBBs; - - /// UncondJmpMBBs - A list of BBs which are in loops and end with - /// unconditional branches. - SmallVector<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 4> - UncondJmpMBBs; - public: static char ID; CodePlacementOpt() : MachineFunctionPass(&ID) {} @@ -58,7 +50,19 @@ namespace { } private: - bool OptimizeIntraLoopEdges(); + bool HasFallthrough(MachineBasicBlock *MBB); + bool HasAnalyzableTerminator(MachineBasicBlock *MBB); + void Splice(MachineFunction &MF, + MachineFunction::iterator InsertPt, + MachineFunction::iterator Begin, + MachineFunction::iterator End); + void UpdateTerminator(MachineBasicBlock *MBB); + bool EliminateUnconditionalJumpsToTop(MachineFunction &MF, + MachineLoop *L); + bool MoveDiscontiguousLoopBlocks(MachineFunction &MF, + MachineLoop *L); + bool OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF, MachineLoop *L); + bool OptimizeIntraLoopEdges(MachineFunction &MF); bool AlignLoops(MachineFunction &MF); bool AlignLoop(MachineFunction &MF, MachineLoop *L, unsigned Align); }; @@ -70,168 +74,354 @@ FunctionPass *llvm::createCodePlacementOptPass() { return new CodePlacementOpt(); } -/// OptimizeBackEdges - Place loop back edges to move unconditional branches -/// out of the loop. -/// -/// A: -/// ... -/// <fallthrough to B> +/// HasFallthrough - Test whether the given branch has a fallthrough, either as +/// a plain fallthrough or as a fallthrough case of a conditional branch. /// -/// B: --> loop header -/// ... -/// jcc <cond> C, [exit] -/// -/// C: -/// ... -/// jmp B +bool CodePlacementOpt::HasFallthrough(MachineBasicBlock *MBB) { + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond)) + return false; + // This conditional branch has no fallthrough. + if (FBB) + return false; + // An unconditional branch has no fallthrough. + if (Cond.empty() && TBB) + return false; + // It has a fallthrough. + return true; +} + +/// HasAnalyzableTerminator - Test whether AnalyzeBranch will succeed on MBB. +/// This is called before major changes are begun to test whether it will be +/// possible to complete the changes. /// -/// ==> +/// Target-specific code is hereby encouraged to make AnalyzeBranch succeed +/// whenever possible. /// -/// A: -/// ... -/// jmp B +bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) { + // Conservatively ignore EH landing pads. + if (MBB->isLandingPad()) return false; + + // Ignore blocks which look like they might have EH-related control flow. + // At the time of this writing, there are blocks which AnalyzeBranch + // thinks end in single uncoditional branches, yet which have two CFG + // successors. Code in this file is not prepared to reason about such things. + if (!MBB->empty() && MBB->back().getOpcode() == TargetInstrInfo::EH_LABEL) + return false; + + // Aggressively handle return blocks and similar constructs. + if (MBB->succ_empty()) return true; + + // Ask the target's AnalyzeBranch if it can handle this block. + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + // Make the the terminator is understood. + if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond)) + return false; + // Make sure we have the option of reversing the condition. + if (!Cond.empty() && TII->ReverseBranchCondition(Cond)) + return false; + return true; +} + +/// Splice - Move the sequence of instructions [Begin,End) to just before +/// InsertPt. Update branch instructions as needed to account for broken +/// fallthrough edges and to take advantage of newly exposed fallthrough +/// opportunities. /// -/// C: -/// ... -/// <fallthough to B> -/// -/// B: --> loop header -/// ... -/// jcc <cond> C, [exit] +void CodePlacementOpt::Splice(MachineFunction &MF, + MachineFunction::iterator InsertPt, + MachineFunction::iterator Begin, + MachineFunction::iterator End) { + assert(Begin != MF.begin() && End != MF.begin() && InsertPt != MF.begin() && + "Splice can't change the entry block!"); + MachineFunction::iterator OldBeginPrior = prior(Begin); + MachineFunction::iterator OldEndPrior = prior(End); + + MF.splice(InsertPt, Begin, End); + + UpdateTerminator(prior(Begin)); + UpdateTerminator(OldBeginPrior); + UpdateTerminator(OldEndPrior); +} + +/// UpdateTerminator - Update the terminator instructions in MBB to account +/// for changes to the layout. If the block previously used a fallthrough, +/// it may now need a branch, and if it previously used branching it may now +/// be able to use a fallthrough. /// -bool CodePlacementOpt::OptimizeIntraLoopEdges() { - if (!TLI->shouldOptimizeCodePlacement()) - return false; +void CodePlacementOpt::UpdateTerminator(MachineBasicBlock *MBB) { + // A block with no successors has no concerns with fall-through edges. + if (MBB->succ_empty()) return; + + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + bool B = TII->AnalyzeBranch(*MBB, TBB, FBB, Cond); + (void) B; + assert(!B && "UpdateTerminators requires analyzable predecessors!"); + if (Cond.empty()) { + if (TBB) { + // The block has an unconditional branch. If its successor is now + // its layout successor, delete the branch. + if (MBB->isLayoutSuccessor(TBB)) + TII->RemoveBranch(*MBB); + } else { + // The block has an unconditional fallthrough. If its successor is not + // its layout successor, insert a branch. + TBB = *MBB->succ_begin(); + if (!MBB->isLayoutSuccessor(TBB)) + TII->InsertBranch(*MBB, TBB, 0, Cond); + } + } else { + if (FBB) { + // The block has a non-fallthrough conditional branch. If one of its + // successors is its layout successor, rewrite it to a fallthrough + // conditional branch. + if (MBB->isLayoutSuccessor(TBB)) { + TII->RemoveBranch(*MBB); + TII->ReverseBranchCondition(Cond); + TII->InsertBranch(*MBB, FBB, 0, Cond); + } else if (MBB->isLayoutSuccessor(FBB)) { + TII->RemoveBranch(*MBB); + TII->InsertBranch(*MBB, TBB, 0, Cond); + } + } else { + // The block has a fallthrough conditional branch. + MachineBasicBlock *MBBA = *MBB->succ_begin(); + MachineBasicBlock *MBBB = *next(MBB->succ_begin()); + if (MBBA == TBB) std::swap(MBBB, MBBA); + if (MBB->isLayoutSuccessor(TBB)) { + TII->RemoveBranch(*MBB); + TII->ReverseBranchCondition(Cond); + TII->InsertBranch(*MBB, MBBA, 0, Cond); + } else if (!MBB->isLayoutSuccessor(MBBA)) { + TII->RemoveBranch(*MBB); + TII->InsertBranch(*MBB, TBB, MBBA, Cond); + } + } + } +} +/// EliminateUnconditionalJumpsToTop - Move blocks which unconditionally jump +/// to the loop top to the top of the loop so that they have a fall through. +/// This can introduce a branch on entry to the loop, but it can eliminate a +/// branch within the loop. See the @simple case in +/// test/CodeGen/X86/loop_blocks.ll for an example of this. +bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF, + MachineLoop *L) { bool Changed = false; - for (unsigned i = 0, e = UncondJmpMBBs.size(); i != e; ++i) { - MachineBasicBlock *MBB = UncondJmpMBBs[i].first; - MachineBasicBlock *SuccMBB = UncondJmpMBBs[i].second; - MachineLoop *L = MLI->getLoopFor(MBB); - assert(L && "BB is expected to be in a loop!"); - - if (ChangedMBBs.count(MBB)) { - // BB has been modified, re-analyze. - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector<MachineOperand, 4> Cond; - if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond) || !Cond.empty()) + MachineBasicBlock *TopMBB = L->getTopBlock(); + + bool BotHasFallthrough = HasFallthrough(L->getBottomBlock()); + + if (TopMBB == MF.begin() || + HasAnalyzableTerminator(prior(MachineFunction::iterator(TopMBB)))) { + new_top: + for (MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(), + PE = TopMBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock *Pred = *PI; + if (Pred == TopMBB) continue; + if (HasFallthrough(Pred)) continue; + if (!L->contains(Pred)) continue; + + // Verify that we can analyze all the loop entry edges before beginning + // any changes which will require us to be able to analyze them. + if (Pred == MF.begin()) continue; - if (MLI->getLoopFor(TBB) != L || TBB->isLandingPad()) + if (!HasAnalyzableTerminator(Pred)) + continue; + if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Pred)))) continue; - SuccMBB = TBB; - } else { - assert(MLI->getLoopFor(SuccMBB) == L && - "Successor is not in the same loop!"); - } - if (MBB->isLayoutSuccessor(SuccMBB)) { - // Successor is right after MBB, just eliminate the unconditional jmp. - // Can this happen? - TII->RemoveBranch(*MBB); - ChangedMBBs.insert(MBB); - ++NumIntraElim; + // Move the block. Changed = true; - continue; - } - // Now check if the predecessor is fallthrough from any BB. If there is, - // that BB should be from outside the loop since edge will become a jmp. - bool OkToMove = true; - MachineBasicBlock *FtMBB = 0, *FtTBB = 0, *FtFBB = 0; - SmallVector<MachineOperand, 4> FtCond; - for (MachineBasicBlock::pred_iterator PI = SuccMBB->pred_begin(), - PE = SuccMBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock *PredMBB = *PI; - if (PredMBB->isLayoutSuccessor(SuccMBB)) { - if (TII->AnalyzeBranch(*PredMBB, FtTBB, FtFBB, FtCond)) { - OkToMove = false; + // Move it and all the blocks that can reach it via fallthrough edges + // exclusively, to keep existing fallthrough edges intact. + MachineFunction::iterator Begin = Pred; + MachineFunction::iterator End = next(Begin); + while (Begin != MF.begin()) { + MachineFunction::iterator Prior = prior(Begin); + if (Prior == MF.begin()) + break; + // Stop when a non-fallthrough edge is found. + if (!HasFallthrough(Prior)) + break; + // Stop if a block which could fall-through out of the loop is found. + if (Prior->isSuccessor(End)) + break; + // If we've reached the top, stop scanning. + if (Prior == MachineFunction::iterator(TopMBB)) { + // We know top currently has a fall through (because we just checked + // it) which would be lost if we do the transformation, so it isn't + // worthwhile to do the transformation unless it would expose a new + // fallthrough edge. + if (!Prior->isSuccessor(End)) + goto next_pred; + // Otherwise we can stop scanning and procede to move the blocks. break; } - if (!FtTBB) - FtTBB = SuccMBB; - else if (!FtFBB) { - assert(FtFBB != SuccMBB && "Unexpected control flow!"); - FtFBB = SuccMBB; - } - - // A fallthrough. - FtMBB = PredMBB; - MachineLoop *PL = MLI->getLoopFor(PredMBB); - if (PL && (PL == L || PL->getLoopDepth() >= L->getLoopDepth())) - OkToMove = false; - - break; + // If we hit a switch or something complicated, don't move anything + // for this predecessor. + if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Prior)))) + break; + // Ok, the block prior to Begin will be moved along with the rest. + // Extend the range to include it. + Begin = Prior; + ++NumIntraMoved; } + + // Move the blocks. + Splice(MF, TopMBB, Begin, End); + + // Update TopMBB. + TopMBB = L->getTopBlock(); + + // We have a new loop top. Iterate on it. We shouldn't have to do this + // too many times if BranchFolding has done a reasonable job. + goto new_top; + next_pred:; } + } + + // If the loop previously didn't exit with a fall-through and it now does, + // we eliminated a branch. + if (Changed && + !BotHasFallthrough && + HasFallthrough(L->getBottomBlock())) { + ++NumIntraElim; + BotHasFallthrough = true; + } + + return Changed; +} + +/// MoveDiscontiguousLoopBlocks - Move any loop blocks that are not in the +/// portion of the loop contiguous with the header. This usually makes the loop +/// contiguous, provided that AnalyzeBranch can handle all the relevant +/// branching. See the @cfg_islands case in test/CodeGen/X86/loop_blocks.ll +/// for an example of this. +bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF, + MachineLoop *L) { + bool Changed = false; + MachineBasicBlock *TopMBB = L->getTopBlock(); + MachineBasicBlock *BotMBB = L->getBottomBlock(); + + // Determine a position to move orphaned loop blocks to. If TopMBB is not + // entered via fallthrough and BotMBB is exited via fallthrough, prepend them + // to the top of the loop to avoid loosing that fallthrough. Otherwise append + // them to the bottom, even if it previously had a fallthrough, on the theory + // that it's worth an extra branch to keep the loop contiguous. + MachineFunction::iterator InsertPt = next(MachineFunction::iterator(BotMBB)); + bool InsertAtTop = false; + if (TopMBB != MF.begin() && + !HasFallthrough(prior(MachineFunction::iterator(TopMBB))) && + HasFallthrough(BotMBB)) { + InsertPt = TopMBB; + InsertAtTop = true; + } + + // Keep a record of which blocks are in the portion of the loop contiguous + // with the loop header. + SmallPtrSet<MachineBasicBlock *, 8> ContiguousBlocks; + for (MachineFunction::iterator I = TopMBB, + E = next(MachineFunction::iterator(BotMBB)); I != E; ++I) + ContiguousBlocks.insert(I); + + // Find non-contigous blocks and fix them. + if (InsertPt != MF.begin() && HasAnalyzableTerminator(prior(InsertPt))) + for (MachineLoop::block_iterator BI = L->block_begin(), BE = L->block_end(); + BI != BE; ++BI) { + MachineBasicBlock *BB = *BI; + + // Verify that we can analyze all the loop entry edges before beginning + // any changes which will require us to be able to analyze them. + if (!HasAnalyzableTerminator(BB)) + continue; + if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(BB)))) + continue; + + // If the layout predecessor is part of the loop, this block will be + // processed along with it. This keeps them in their relative order. + if (BB != MF.begin() && + L->contains(prior(MachineFunction::iterator(BB)))) + continue; - if (!OkToMove) - continue; - - // Is it profitable? If SuccMBB can fallthrough itself, that can be changed - // into a jmp. - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector<MachineOperand, 4> Cond; - if (TII->AnalyzeBranch(*SuccMBB, TBB, FBB, Cond)) - continue; - if (!TBB && Cond.empty()) - TBB = next(MachineFunction::iterator(SuccMBB)); - else if (!FBB && !Cond.empty()) - FBB = next(MachineFunction::iterator(SuccMBB)); - - // This calculate the cost of the transformation. Also, it finds the *only* - // intra-loop edge if there is one. - int Cost = 0; - bool HasOneIntraSucc = true; - MachineBasicBlock *IntraSucc = 0; - for (MachineBasicBlock::succ_iterator SI = SuccMBB->succ_begin(), - SE = SuccMBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock *SSMBB = *SI; - if (MLI->getLoopFor(SSMBB) == L) { - if (!IntraSucc) - IntraSucc = SSMBB; - else - HasOneIntraSucc = false; + // Check to see if this block is already contiguous with the main + // portion of the loop. + if (!ContiguousBlocks.insert(BB)) + continue; + + // Move the block. + Changed = true; + + // Process this block and all loop blocks contiguous with it, to keep + // them in their relative order. + MachineFunction::iterator Begin = BB; + MachineFunction::iterator End = next(MachineFunction::iterator(BB)); + for (; End != MF.end(); ++End) { + if (!L->contains(End)) break; + if (!HasAnalyzableTerminator(End)) break; + ContiguousBlocks.insert(End); + ++NumIntraMoved; } - if (SuccMBB->isLayoutSuccessor(SSMBB)) - // This will become a jmp. - ++Cost; - else if (MBB->isLayoutSuccessor(SSMBB)) { - // One of the successor will become the new fallthrough. - if (SSMBB == FBB) { - FBB = 0; - --Cost; - } else if (!FBB && SSMBB == TBB && Cond.empty()) { - TBB = 0; - --Cost; - } else if (!Cond.empty() && !TII->ReverseBranchCondition(Cond)) { - assert(SSMBB == TBB); - TBB = FBB; - FBB = 0; - --Cost; + // If we're inserting at the bottom of the loop, and the code we're + // moving originally had fall-through successors, bring the sucessors + // up with the loop blocks to preserve the fall-through edges. + if (!InsertAtTop) + for (; End != MF.end(); ++End) { + if (L->contains(End)) break; + if (!HasAnalyzableTerminator(End)) break; + if (!HasFallthrough(prior(End))) break; } - } - } - if (Cost) - continue; - - // Now, let's move the successor to below the BB to eliminate the jmp. - SuccMBB->moveAfter(MBB); - TII->RemoveBranch(*MBB); - TII->RemoveBranch(*SuccMBB); - if (TBB) - TII->InsertBranch(*SuccMBB, TBB, FBB, Cond); - ChangedMBBs.insert(MBB); - ChangedMBBs.insert(SuccMBB); - if (FtMBB) { - TII->RemoveBranch(*FtMBB); - TII->InsertBranch(*FtMBB, FtTBB, FtFBB, FtCond); - ChangedMBBs.insert(FtMBB); + + // Move the blocks. This may invalidate TopMBB and/or BotMBB, but + // we don't need them anymore at this point. + Splice(MF, InsertPt, Begin, End); } - Changed = true; - } - ++NumIntraMoved; + return Changed; +} + +/// OptimizeIntraLoopEdgesInLoopNest - Reposition loop blocks to minimize +/// intra-loop branching and to form contiguous loops. +/// +/// This code takes the approach of making minor changes to the existing +/// layout to fix specific loop-oriented problems. Also, it depends on +/// AnalyzeBranch, which can't understand complex control instructions. +/// +bool CodePlacementOpt::OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF, + MachineLoop *L) { + bool Changed = false; + + // Do optimization for nested loops. + for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) + Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I); + + // Do optimization for this loop. + Changed |= EliminateUnconditionalJumpsToTop(MF, L); + Changed |= MoveDiscontiguousLoopBlocks(MF, L); + + return Changed; +} + +/// OptimizeIntraLoopEdges - Reposition loop blocks to minimize +/// intra-loop branching and to form contiguous loops. +/// +bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) { + bool Changed = false; + + if (!TLI->shouldOptimizeCodePlacement()) + return Changed; + + // Do optimization for each loop in the function. + for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); + I != E; ++I) + if (!(*I)->getParentLoop()) + Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I); + return Changed; } @@ -255,6 +445,8 @@ bool CodePlacementOpt::AlignLoops(MachineFunction &MF) { return Changed; } +/// AlignLoop - Align loop headers to target preferred alignments. +/// bool CodePlacementOpt::AlignLoop(MachineFunction &MF, MachineLoop *L, unsigned Align) { bool Changed = false; @@ -263,17 +455,7 @@ bool CodePlacementOpt::AlignLoop(MachineFunction &MF, MachineLoop *L, for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) Changed |= AlignLoop(MF, *I, Align); - MachineBasicBlock *TopMBB = L->getHeader(); - if (TopMBB == MF.begin()) return Changed; - - MachineBasicBlock *PredMBB = prior(MachineFunction::iterator(TopMBB)); - while (MLI->getLoopFor(PredMBB) == L) { - TopMBB = PredMBB; - if (TopMBB == MF.begin()) return Changed; - PredMBB = prior(MachineFunction::iterator(TopMBB)); - } - - TopMBB->setAlignment(Align); + L->getTopBlock()->setAlignment(Align); Changed = true; ++NumLoopsAligned; @@ -288,30 +470,9 @@ bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) { TLI = MF.getTarget().getTargetLowering(); TII = MF.getTarget().getInstrInfo(); - // Analyze the BBs first and keep track of BBs that - // end with an unconditional jmp to another block in the same loop. - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { - MachineBasicBlock *MBB = I; - if (MBB->isLandingPad()) - continue; - MachineLoop *L = MLI->getLoopFor(MBB); - if (!L) - continue; - - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector<MachineOperand, 4> Cond; - if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond) || !Cond.empty()) - continue; - if (MLI->getLoopFor(TBB) == L && !TBB->isLandingPad()) - UncondJmpMBBs.push_back(std::make_pair(MBB, TBB)); - } - - bool Changed = OptimizeIntraLoopEdges(); + bool Changed = OptimizeIntraLoopEdges(MF); Changed |= AlignLoops(MF); - ChangedMBBs.clear(); - UncondJmpMBBs.clear(); - return Changed; } diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp index 4f32c2b..f35d196 100644 --- a/lib/CodeGen/ExactHazardRecognizer.cpp +++ b/lib/CodeGen/ExactHazardRecognizer.cpp @@ -22,7 +22,8 @@ using namespace llvm; -ExactHazardRecognizer::ExactHazardRecognizer(const InstrItineraryData &LItinData) : +ExactHazardRecognizer:: +ExactHazardRecognizer(const InstrItineraryData &LItinData) : ScheduleHazardRecognizer(), ItinData(LItinData) { // Determine the maximum depth of any itinerary. This determines the diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 4e713a6..e58a9ca 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -323,7 +323,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // Second pass scheduler. if (OptLevel != CodeGenOpt::None) { - PM.add(createPostRAScheduler()); + PM.add(createPostRAScheduler(OptLevel)); printAndVerify(PM); } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 93d3d4c..79f46f3 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -28,7 +28,6 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -2603,7 +2602,19 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li, tri_->isSuperRegister(*AS, SpillReg)); bool Cut = false; - LiveInterval &pli = getInterval(SpillReg); + SmallVector<unsigned, 4> PRegs; + if (hasInterval(SpillReg)) + PRegs.push_back(SpillReg); + else { + SmallSet<unsigned, 4> Added; + for (const unsigned* AS = tri_->getSubRegisters(SpillReg); *AS; ++AS) + if (Added.insert(*AS) && hasInterval(*AS)) { + PRegs.push_back(*AS); + for (const unsigned* ASS = tri_->getSubRegisters(*AS); *ASS; ++ASS) + Added.insert(*ASS); + } + } + SmallPtrSet<MachineInstr*, 8> SeenMIs; for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg), E = mri_->reg_end(); I != E; ++I) { @@ -2613,8 +2624,12 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li, continue; SeenMIs.insert(MI); LiveIndex Index = getInstructionIndex(MI); - if (pli.liveAt(Index)) { - vrm.addEmergencySpill(SpillReg, MI); + for (unsigned i = 0, e = PRegs.size(); i != e; ++i) { + unsigned PReg = PRegs[i]; + LiveInterval &pli = getInterval(PReg); + if (!pli.liveAt(Index)) + continue; + vrm.addEmergencySpill(PReg, MI); LiveIndex StartIdx = getLoadIndex(Index); LiveIndex EndIdx = getNextSlot(getStoreIndex(Index)); if (pli.isInOneLiveRange(StartIdx, EndIdx)) { @@ -2626,12 +2641,12 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li, Msg << "Ran out of registers during register allocation!"; if (MI->getOpcode() == TargetInstrInfo::INLINEASM) { Msg << "\nPlease check your inline asm statement for invalid " - << "constraints:\n"; + << "constraints:\n"; MI->print(Msg, tm_); } llvm_report_error(Msg.str()); } - for (const unsigned* AS = tri_->getSubRegisters(SpillReg); *AS; ++AS) { + for (const unsigned* AS = tri_->getSubRegisters(PReg); *AS; ++AS) { if (!hasInterval(*AS)) continue; LiveInterval &spli = getInterval(*AS); diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index b9d3ba7..1f85e92 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -220,8 +220,10 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { OS << "imp-"; OS << "def"; NeedComma = true; - } else if (isImplicit()) + } else if (isImplicit()) { OS << "imp-use"; + NeedComma = true; + } if (isKill() || isDead() || isUndef()) { if (NeedComma) OS << ','; diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp index 2da8e37..db77d19 100644 --- a/lib/CodeGen/MachineLoopInfo.cpp +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -43,3 +43,31 @@ void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineDominatorTree>(); MachineFunctionPass::getAnalysisUsage(AU); } + +MachineBasicBlock *MachineLoop::getTopBlock() { + MachineBasicBlock *TopMBB = getHeader(); + MachineFunction::iterator Begin = TopMBB->getParent()->begin(); + if (TopMBB != Begin) { + MachineBasicBlock *PriorMBB = prior(MachineFunction::iterator(TopMBB)); + while (contains(PriorMBB)) { + TopMBB = PriorMBB; + if (TopMBB == Begin) break; + PriorMBB = prior(MachineFunction::iterator(TopMBB)); + } + } + return TopMBB; +} + +MachineBasicBlock *MachineLoop::getBottomBlock() { + MachineBasicBlock *BotMBB = getHeader(); + MachineFunction::iterator End = BotMBB->getParent()->end(); + if (BotMBB != prior(End)) { + MachineBasicBlock *NextMBB = next(MachineFunction::iterator(BotMBB)); + while (contains(NextMBB)) { + BotMBB = NextMBB; + if (BotMBB == next(MachineFunction::iterator(BotMBB))) break; + NextMBB = next(MachineFunction::iterator(BotMBB)); + } + } + return BotMBB; +} diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 0f3b33f..a00bebb 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -34,10 +34,8 @@ STATISTIC(NumSunk, "Number of machine instructions sunk"); namespace { class VISIBILITY_HIDDEN MachineSinking : public MachineFunctionPass { - const TargetMachine *TM; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - MachineFunction *CurMF; // Current MachineFunction MachineRegisterInfo *RegInfo; // Machine register information MachineDominatorTree *DT; // Machine dominator tree AliasAnalysis *AA; @@ -92,19 +90,16 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, return true; } - - bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { DEBUG(errs() << "******** Machine Sinking ********\n"); - CurMF = &MF; - TM = &CurMF->getTarget(); - TII = TM->getInstrInfo(); - TRI = TM->getRegisterInfo(); - RegInfo = &CurMF->getRegInfo(); + const TargetMachine &TM = MF.getTarget(); + TII = TM.getInstrInfo(); + TRI = TM.getRegisterInfo(); + RegInfo = &MF.getRegInfo(); DT = &getAnalysis<MachineDominatorTree>(); AA = &getAnalysis<AliasAnalysis>(); - AllocatableSet = TRI->getAllocatableSet(*CurMF); + AllocatableSet = TRI->getAllocatableSet(MF); bool EverMadeChange = false; @@ -112,7 +107,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { bool MadeChange = false; // Process all basic blocks. - for (MachineFunction::iterator I = CurMF->begin(), E = CurMF->end(); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) MadeChange |= ProcessBlock(*I); @@ -256,7 +251,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { if (SuccToSinkTo->isLandingPad()) return false; - // If is not possible to sink an instruction into its own block. This can + // It is not possible to sink an instruction into its own block. This can // happen with loops. if (MI->getParent() == SuccToSinkTo) return false; diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index e52158c..8fdbe9b 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -78,10 +78,12 @@ DebugMod("postra-sched-debugmod", namespace { class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass { AliasAnalysis *AA; + CodeGenOpt::Level OptLevel; public: static char ID; - PostRAScheduler() : MachineFunctionPass(&ID) {} + PostRAScheduler(CodeGenOpt::Level ol) : + MachineFunctionPass(&ID), OptLevel(ol) {} void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -126,6 +128,9 @@ namespace { /// AA - AliasAnalysis for making memory reference queries. AliasAnalysis *AA; + /// AntiDepMode - Anti-dependence breaking mode + TargetSubtarget::AntiDepBreakMode AntiDepMode; + /// Classes - For live regs that are only used in one register class in a /// live range, the register class. If the register is not live, the /// corresponding value is null. If the register is live but used in @@ -154,10 +159,11 @@ namespace { const MachineLoopInfo &MLI, const MachineDominatorTree &MDT, ScheduleHazardRecognizer *HR, - AliasAnalysis *aa) + AliasAnalysis *aa, + TargetSubtarget::AntiDepBreakMode adm) : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AllocatableSet(TRI->getAllocatableSet(MF)), - HazardRec(HR), AA(aa) {} + HazardRec(HR), AA(aa), AntiDepMode(adm) {} ~SchedulePostRATDList() { delete HazardRec; @@ -232,14 +238,21 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { AA = &getAnalysis<AliasAnalysis>(); // Check for explicit enable/disable of post-ra scheduling. + TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE; if (EnablePostRAScheduler.getPosition() > 0) { if (!EnablePostRAScheduler) - return true; + return false; } else { - // Check that post-RA scheduling is enabled for this function + // Check that post-RA scheduling is enabled for this target. const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>(); - if (!ST.enablePostRAScheduler()) - return true; + if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode)) + return false; + } + + // Check for antidep breaking override... + if (EnableAntiDepBreaking.getPosition() > 0) { + AntiDepMode = (EnableAntiDepBreaking) ? + TargetSubtarget::ANTIDEP_CRITICAL : TargetSubtarget::ANTIDEP_NONE; } DEBUG(errs() << "PostRAScheduler\n"); @@ -251,7 +264,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { (ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) : (ScheduleHazardRecognizer *)new SimpleHazardRecognizer(); - SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, AA); + SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, AA, AntiDepMode); // Loop over all of the basic blocks for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); @@ -391,7 +404,7 @@ void SchedulePostRATDList::Schedule() { // Build the scheduling graph. BuildSchedGraph(AA); - if (EnableAntiDepBreaking) { + if (AntiDepMode != TargetSubtarget::ANTIDEP_NONE) { if (BreakAntiDependencies()) { // We made changes. Update the dependency graph. // Theoretically we could update the graph in place: @@ -1195,6 +1208,6 @@ void SchedulePostRATDList::ListScheduleTopDown() { // Public Constructor Functions //===----------------------------------------------------------------------===// -FunctionPass *llvm::createPostRAScheduler() { - return new PostRAScheduler(); +FunctionPass *llvm::createPostRAScheduler(CodeGenOpt::Level OptLevel) { + return new PostRAScheduler(OptLevel); } diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp index 8fa07d4..726869a 100644 --- a/lib/CodeGen/PreAllocSplitting.cpp +++ b/lib/CodeGen/PreAllocSplitting.cpp @@ -952,7 +952,7 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg, if (I != IntervalSSMap.end()) { SS = I->second; } else { - SS = MFI->CreateStackObject(RC->getSize(), RC->getAlignment()); + SS = MFI->CreateStackObject(RC->getSize(), RC->getAlignment()); } MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(), diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 7af0bba..a0860a1 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -259,7 +259,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { // the TargetRegisterClass if the stack alignment is smaller. Use the // min. Align = std::min(Align, StackAlign); - FrameIdx = FFI->CreateStackObject(RC->getSize(), Align); + FrameIdx = FFI->CreateStackObject(RC->getSize(), Align, true); if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { @@ -729,10 +729,12 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { /// the instruciton range. Return the operand number of the kill in Operand. static MachineBasicBlock::iterator findLastUseReg(MachineBasicBlock::iterator I, MachineBasicBlock::iterator ME, - unsigned Reg, unsigned *Operand) { + unsigned Reg) { // Scan forward to find the last use of this virtual register for (++I; I != ME; ++I) { MachineInstr *MI = I; + bool isDefInsn = false; + bool isKillInsn = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) if (MI->getOperand(i).isReg()) { unsigned OpReg = MI->getOperand(i).getReg(); @@ -740,13 +742,14 @@ findLastUseReg(MachineBasicBlock::iterator I, MachineBasicBlock::iterator ME, continue; assert (OpReg == Reg && "overlapping use of scavenged index register!"); - // If this is the killing use, we're done - if (MI->getOperand(i).isKill()) { - if (Operand) - *Operand = i; - return I; - } + // If this is the killing use, we have a candidate. + if (MI->getOperand(i).isKill()) + isKillInsn = true; + else if (MI->getOperand(i).isDef()) + isDefInsn = true; } + if (isKillInsn && !isDefInsn) + return I; } // If we hit the end of the basic block, there was no kill of // the virtual register, which is wrong. @@ -763,10 +766,13 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { E = Fn.end(); BB != E; ++BB) { RS->enterBasicBlock(BB); + // FIXME: The logic flow in this function is still too convoluted. + // It needs a cleanup refactoring. Do that in preparation for tracking + // more than one scratch register value and using ranges to find + // available scratch registers. unsigned CurrentVirtReg = 0; unsigned CurrentScratchReg = 0; bool havePrevValue = false; - unsigned PrevScratchReg = 0; int PrevValue = 0; MachineInstr *PrevLastUseMI = NULL; unsigned PrevLastUseOp = 0; @@ -776,37 +782,39 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // The instruction stream may change in the loop, so check BB->end() // directly. - for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { MachineInstr *MI = I; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) + bool isDefInsn = false; + bool isKillInsn = false; + bool clobbersScratchReg = false; + bool DoIncr = true; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (MI->getOperand(i).isReg()) { MachineOperand &MO = MI->getOperand(i); unsigned Reg = MO.getReg(); if (Reg == 0) continue; if (!TargetRegisterInfo::isVirtualRegister(Reg)) { - // If we have an active scavenged register, we shouldn't be - // seeing any references to it. - assert (Reg != CurrentScratchReg - && "overlapping use of scavenged frame index register!"); - // If we have a previous scratch reg, check and see if anything // here kills whatever value is in there. - if (Reg == PrevScratchReg) { + if (Reg == CurrentScratchReg) { if (MO.isUse()) { // Two-address operands implicitly kill - if (MO.isKill() || MI->isRegTiedToDefOperand(i)) { - havePrevValue = false; - PrevScratchReg = 0; - } + if (MO.isKill() || MI->isRegTiedToDefOperand(i)) + clobbersScratchReg = true; } else { assert (MO.isDef()); - havePrevValue = false; - PrevScratchReg = 0; + clobbersScratchReg = true; } } continue; } + // If this is a def, remember that this insn defines the value. + // This lets us properly consider insns which re-use the scratch + // register, such as r2 = sub r2, #imm, in the middle of the + // scratch range. + if (MO.isDef()) + isDefInsn = true; // Have we already allocated a scratch register for this virtual? if (Reg != CurrentVirtReg) { @@ -837,50 +845,75 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // for the virtual register are exclusively for the purpose // of populating the value in the register. That's reasonable // for these frame index registers, but it's still a very, very - // strong assumption. Perhaps this implies that the frame index - // elimination should be before register allocation, with - // conservative heuristics since we'll know less then, and - // the reuse calculations done directly when doing the code-gen? + // strong assumption. rdar://7322732. Better would be to + // explicitly check each instruction in the range for references + // to the virtual register. Only delete those insns that + // touch the virtual register. // Find the last use of the new virtual register. Remove all // instruction between here and there, and update the current // instruction to reference the last use insn instead. MachineBasicBlock::iterator LastUseMI = - findLastUseReg(I, BB->end(), Reg, &i); + findLastUseReg(I, BB->end(), Reg); + // Remove all instructions up 'til the last use, since they're // just calculating the value we already have. BB->erase(I, LastUseMI); MI = I = LastUseMI; - e = MI->getNumOperands(); - CurrentScratchReg = PrevScratchReg; - // Extend the live range of the register + // Extend the live range of the scratch register PrevLastUseMI->getOperand(PrevLastUseOp).setIsKill(false); RS->setUsed(CurrentScratchReg); - } else { CurrentVirtReg = Reg; - const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); - CurrentScratchReg = RS->FindUnusedReg(RC); - if (CurrentScratchReg == 0) - // No register is "free". Scavenge a register. - CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj); - PrevValue = Value; + // We deleted the instruction we were scanning the operands of. + // Jump back to the instruction iterator loop. Don't increment + // past this instruction since we updated the iterator already. + DoIncr = false; + break; } + + // Scavenge a new scratch register + CurrentVirtReg = Reg; + const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); + CurrentScratchReg = RS->FindUnusedReg(RC); + if (CurrentScratchReg == 0) + // No register is "free". Scavenge a register. + CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj); + + PrevValue = Value; } + // replace this reference to the virtual register with the + // scratch register. assert (CurrentScratchReg && "Missing scratch register!"); MI->getOperand(i).setReg(CurrentScratchReg); - // If this is the last use of the register, stop tracking it. if (MI->getOperand(i).isKill()) { - PrevScratchReg = CurrentScratchReg; - PrevLastUseMI = MI; + isKillInsn = true; PrevLastUseOp = i; - CurrentScratchReg = CurrentVirtReg = 0; - havePrevValue = trackingCurrentValue; + PrevLastUseMI = MI; } } - RS->forward(MI); + } + // If this is the last use of the scratch, stop tracking it. The + // last use will be a kill operand in an instruction that does + // not also define the scratch register. + if (isKillInsn && !isDefInsn) { + CurrentVirtReg = 0; + havePrevValue = trackingCurrentValue; + } + // Similarly, notice if instruction clobbered the value in the + // register we're tracking for possible later reuse. This is noted + // above, but enforced here since the value is still live while we + // process the rest of the operands of the instruction. + if (clobbersScratchReg) { + havePrevValue = false; + CurrentScratchReg = 0; + } + if (DoIncr) { + RS->forward(I); + ++I; + } } } } diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp index 00c5d46..70e8640 100644 --- a/lib/CodeGen/PseudoSourceValue.cpp +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -63,6 +63,8 @@ namespace { virtual bool isConstant(const MachineFrameInfo *MFI) const; + virtual bool isAliased(const MachineFrameInfo *MFI) const; + virtual void printCustom(raw_ostream &OS) const { OS << "FixedStack" << FI; } @@ -89,6 +91,26 @@ bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const { return false; } +bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const { + if (this == getStack() || + this == getGOT() || + this == getConstantPool() || + this == getJumpTable()) + return false; + llvm_unreachable("Unknown PseudoSourceValue!"); + return true; +} + bool FixedStackPseudoSourceValue::isConstant(const MachineFrameInfo *MFI) const{ return MFI && MFI->isImmutableObjectIndex(FI); } + +bool FixedStackPseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const { + // Negative frame indices are used for special things that don't + // appear in LLVM IR. Non-negative indices may be used for things + // like static allocas. + if (!MFI) + return FI >= 0; + // Spill slots should not alias others. + return !MFI->isFixedObjectIndex(FI) && !MFI->isSpillSlotObjectIndex(FI); +} diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp index 6caa2d3..28ede55 100644 --- a/lib/CodeGen/RegAllocLocal.cpp +++ b/lib/CodeGen/RegAllocLocal.cpp @@ -263,7 +263,7 @@ int RALocal::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { // Allocate a new stack object for this spill location... int FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(), - RC->getAlignment()); + RC->getAlignment(),true); // Assign the slot... StackSlotForVirtReg[VirtReg] = FrameIdx; diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 5f1c4e2..2518ce1 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -300,7 +300,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // If the target knows how to save/restore the register, let it do so; // otherwise, use the emergency stack spill slot. - if (!TRI->saveScavengerRegister(*MBB, I, RC, SReg)) { + if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) { // Spill the scavenged register before I. assert(ScavengingFrameIndex >= 0 && "Cannot scavenge register without an emergency spill slot!"); @@ -310,8 +310,9 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // Restore the scavenged register before its use (or first terminator). TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC); - } else - TRI->restoreScavengerRegister(*MBB, UseMI, RC, SReg); + II = prior(UseMI); + TRI->eliminateFrameIndex(II, SPAdj, NULL, this); + } ScavengeRestore = prior(UseMI); diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 44e9296..43454dd 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -32,7 +32,9 @@ using namespace llvm; ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo &mli, const MachineDominatorTree &mdt) - : ScheduleDAG(mf), MLI(mli), MDT(mdt), LoopRegs(MLI, MDT) {} + : ScheduleDAG(mf), MLI(mli), MDT(mdt), LoopRegs(MLI, MDT) { + MFI = mf.getFrameInfo(); +} /// Run - perform scheduling. /// @@ -95,7 +97,8 @@ static const Value *getUnderlyingObject(const Value *V) { /// getUnderlyingObjectForInstr - If this machine instr has memory reference /// information and it can be tracked to a normal reference to a known /// object, return the Value for that object. Otherwise return null. -static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI) { +static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, + const MachineFrameInfo *MFI) { if (!MI->hasOneMemOperand() || !(*MI->memoperands_begin())->getValue() || (*MI->memoperands_begin())->isVolatile()) @@ -106,10 +109,19 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI) { return 0; V = getUnderlyingObject(V); - if (!isa<PseudoSourceValue>(V) && !isIdentifiedObject(V)) - return 0; + if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { + // For now, ignore PseudoSourceValues which may alias LLVM IR values + // because the code that uses this function has no way to cope with + // such aliases. + if (PSV->isAliased(MFI)) + return 0; + return V; + } - return V; + if (isIdentifiedObject(V)) + return V; + + return 0; } void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { @@ -344,7 +356,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // Unknown memory accesses. Assume the worst. ChainMMO = 0; } else if (TID.mayStore()) { - if (const Value *V = getUnderlyingObjectForInstr(MI)) { + if (const Value *V = getUnderlyingObjectForInstr(MI, MFI)) { // A store to a specific PseudoSourceValue. Add precise dependencies. // Handle the def in MemDefs, if there is one. std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V); @@ -377,7 +389,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { } else if (TID.mayLoad()) { if (MI->isInvariantLoad(AA)) { // Invariant load, no chain dependencies needed! - } else if (const Value *V = getUnderlyingObjectForInstr(MI)) { + } else if (const Value *V = getUnderlyingObjectForInstr(MI, MFI)) { // A load from a specific PseudoSourceValue. Add precise dependencies. std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V); if (I != MemDefs.end()) diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h index 29e1c98..366c3a8 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.h +++ b/lib/CodeGen/ScheduleDAGInstrs.h @@ -98,6 +98,7 @@ namespace llvm { class VISIBILITY_HIDDEN ScheduleDAGInstrs : public ScheduleDAG { const MachineLoopInfo &MLI; const MachineDominatorTree &MDT; + const MachineFrameInfo *MFI; /// Defs, Uses - Remember where defs and uses of each physical register /// are as we iterate upward through the instructions. This is allocated diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index 95ad05e..4851d49 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -18,7 +18,6 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 1ed3082..e3f8f0f 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4381,15 +4381,17 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); if (isNegatibleForFree(N0, LegalOperations)) return GetNegatedExpression(N0, DAG, LegalOperations); // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading // constant pool values. - if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() && - N0.getOperand(0).getValueType().isInteger() && - !N0.getOperand(0).getValueType().isVector()) { + if (N0.getOpcode() == ISD::BIT_CONVERT && + !VT.isVector() && + N0.getNode()->hasOneUse() && + N0.getOperand(0).getValueType().isInteger()) { SDValue Int = N0.getOperand(0); EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { @@ -4397,7 +4399,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); AddToWorkList(Int.getNode()); return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), - N->getValueType(0), Int); + VT, Int); } } diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index fc01b07..7138dd2 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1655,8 +1655,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, } /// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and -/// condition code CC on the current target. This routine assumes LHS and rHS -/// have already been legalized by LegalizeSetCCOperands. It expands SETCC with +/// condition code CC on the current target. This routine expands SETCC with /// illegal condition code into AND / OR of multiple SETCC values. void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 859c656..e1b7022 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -617,6 +617,7 @@ private: SDValue WidenVecOp_BIT_CONVERT(SDNode *N); SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); SDValue WidenVecOp_Convert(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 0eafe62..dbd3e39 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -115,7 +115,8 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, // Create the stack frame object. Make sure it is aligned for both // the source and expanded destination types. unsigned Alignment = - TLI.getTargetData()->getPrefTypeAlignment(NOutVT.getTypeForEVT(*DAG.getContext())); + TLI.getTargetData()->getPrefTypeAlignment(NOutVT. + getTypeForEVT(*DAG.getContext())); SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); const Value *SV = PseudoSourceValue::getFixedStack(SPFI); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index a03f825..75e1239 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1789,6 +1789,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { case ISD::BIT_CONVERT: Res = WidenVecOp_BIT_CONVERT(N); break; case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; @@ -1893,6 +1894,12 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); } +SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) { + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), + N->getValueType(0), InOp, N->getOperand(1)); +} + SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue InOp = GetWidenedVector(N->getOperand(0)); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 542bf64..37736c0 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -30,6 +30,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -4600,7 +4601,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, N->InitOperands(new SDUse[NumOps], Ops, NumOps); N->OperandsNeedDelete = true; } else - MN->InitOperands(MN->OperandList, Ops, NumOps); + N->InitOperands(N->OperandList, Ops, NumOps); } // Delete any nodes that are still dead after adding the uses for the @@ -5404,14 +5405,16 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::EH_RETURN: return "EH_RETURN"; case ISD::ConstantPool: return "ConstantPool"; case ISD::ExternalSymbol: return "ExternalSymbol"; - case ISD::INTRINSIC_WO_CHAIN: { - unsigned IID = cast<ConstantSDNode>(getOperand(0))->getZExtValue(); - return Intrinsic::getName((Intrinsic::ID)IID); - } + case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: { - unsigned IID = cast<ConstantSDNode>(getOperand(1))->getZExtValue(); - return Intrinsic::getName((Intrinsic::ID)IID); + unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; + unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue(); + if (IID < Intrinsic::num_intrinsics) + return Intrinsic::getName((Intrinsic::ID)IID); + else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) + return TII->getName(IID); + llvm_unreachable("Invalid intrinsic ID"); } case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp index 9017e43..adcc532 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp @@ -5485,48 +5485,6 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { DAG.setRoot(Chain); } - -void SelectionDAGLowering::visitMalloc(MallocInst &I) { - SDValue Src = getValue(I.getOperand(0)); - - // Scale up by the type size in the original i32 type width. Various - // mid-level optimizers may make assumptions about demanded bits etc from the - // i32-ness of the optimizer: we do not want to promote to i64 and then - // multiply on 64-bit targets. - // FIXME: Malloc inst should go away: PR715. - uint64_t ElementSize = TD->getTypeAllocSize(I.getType()->getElementType()); - if (ElementSize != 1) { - // Src is always 32-bits, make sure the constant fits. - assert(Src.getValueType() == MVT::i32); - ElementSize = (uint32_t)ElementSize; - Src = DAG.getNode(ISD::MUL, getCurDebugLoc(), Src.getValueType(), - Src, DAG.getConstant(ElementSize, Src.getValueType())); - } - - EVT IntPtr = TLI.getPointerTy(); - - Src = DAG.getZExtOrTrunc(Src, getCurDebugLoc(), IntPtr); - - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = Src; - Entry.Ty = TLI.getTargetData()->getIntPtrType(*DAG.getContext()); - Args.push_back(Entry); - - bool isTailCall = PerformTailCallOpt && - isInTailCallPosition(&I, Attribute::None, TLI); - std::pair<SDValue,SDValue> Result = - TLI.LowerCallTo(getRoot(), I.getType(), false, false, false, false, - 0, CallingConv::C, isTailCall, - /*isReturnValueUsed=*/true, - DAG.getExternalSymbol("malloc", IntPtr), - Args, DAG, getCurDebugLoc()); - if (Result.first.getNode()) - setValue(&I, Result.first); // Pointers always fit in registers - if (Result.second.getNode()) - DAG.setRoot(Result.second); -} - void SelectionDAGLowering::visitFree(FreeInst &I) { TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h index 06acc8a..722b1d8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h @@ -60,7 +60,6 @@ class MachineFunction; class MachineInstr; class MachineModuleInfo; class MachineRegisterInfo; -class MallocInst; class PHINode; class PtrToIntInst; class ReturnInst; @@ -529,7 +528,6 @@ private: void visitGetElementPtr(User &I); void visitSelect(User &I); - void visitMalloc(MallocInst &I); void visitFree(FreeInst &I); void visitAlloca(AllocaInst &I); void visitLoad(LoadInst &I); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index ae98da5..72e7f58 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -226,7 +226,7 @@ static void EmitLiveInCopy(MachineBasicBlock *MBB, assert(Emitted && "Unable to issue a live-in copy instruction!\n"); (void) Emitted; -CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg)); + CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg)); if (Coalesced) { if (&*InsertPos == UseMI) ++InsertPos; MBB->erase(UseMI); diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 350bc6e..0204969 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -111,11 +111,16 @@ bool StackProtector::RequiresStackProtector() const { // protectors. return true; - if (const ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) + if (const ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) { + // We apparently only care about character arrays. + if (AT->getElementType() != Type::getInt8Ty(AT->getContext())) + continue; + // If an array has more than SSPBufferSize bytes of allocated space, // then we emit stack protectors. if (SSPBufferSize <= TD->getTypeAllocSize(AT)) return true; + } } } diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index c78f35b..cac098b 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -118,7 +118,7 @@ int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) { "attempt to assign stack slot to already spilled register"); const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg); int SS = MF->getFrameInfo()->CreateStackObject(RC->getSize(), - RC->getAlignment()); + RC->getAlignment(), /*isSS*/true); if (LowSpillSlot == NO_STACK_SLOT) LowSpillSlot = SS; if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot) @@ -162,7 +162,7 @@ int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) { if (I != EmergencySpillSlots.end()) return I->second; int SS = MF->getFrameInfo()->CreateStackObject(RC->getSize(), - RC->getAlignment()); + RC->getAlignment(), /*isSS*/true); if (LowSpillSlot == NO_STACK_SLOT) LowSpillSlot = SS; if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot) diff --git a/lib/CompilerDriver/Main.cpp b/lib/CompilerDriver/Main.cpp index 3e1fc9f..c581809 100644 --- a/lib/CompilerDriver/Main.cpp +++ b/lib/CompilerDriver/Main.cpp @@ -95,8 +95,7 @@ int Main(int argc, char** argv) { (argc, argv, "LLVM Compiler Driver (Work In Progress)", true); PluginLoader Plugins; - Plugins.PopulateLanguageMap(langMap); - Plugins.PopulateCompilationGraph(graph); + Plugins.RunInitialization(langMap, graph); if (CheckGraph) { int ret = graph.Check(); diff --git a/lib/CompilerDriver/Plugin.cpp b/lib/CompilerDriver/Plugin.cpp index 7310d12..0fdfef4 100644 --- a/lib/CompilerDriver/Plugin.cpp +++ b/lib/CompilerDriver/Plugin.cpp @@ -62,18 +62,17 @@ namespace llvmc { pluginListInitialized = false; } - void PluginLoader::PopulateLanguageMap(LanguageMap& langMap) { + void PluginLoader::RunInitialization(LanguageMap& langMap, + CompilationGraph& graph) const + { llvm::sys::SmartScopedLock<true> Lock(*PluginMutex); for (PluginList::iterator B = Plugins.begin(), E = Plugins.end(); - B != E; ++B) - (*B)->PopulateLanguageMap(langMap); - } - - void PluginLoader::PopulateCompilationGraph(CompilationGraph& graph) { - llvm::sys::SmartScopedLock<true> Lock(*PluginMutex); - for (PluginList::iterator B = Plugins.begin(), E = Plugins.end(); - B != E; ++B) - (*B)->PopulateCompilationGraph(graph); + B != E; ++B) { + const BasePlugin* BP = *B; + BP->PreprocessOptions(); + BP->PopulateLanguageMap(langMap); + BP->PopulateCompilationGraph(graph); + } } } diff --git a/lib/ExecutionEngine/JIT/CMakeLists.txt b/lib/ExecutionEngine/JIT/CMakeLists.txt index 41b3b4e..42020d6 100644 --- a/lib/ExecutionEngine/JIT/CMakeLists.txt +++ b/lib/ExecutionEngine/JIT/CMakeLists.txt @@ -8,7 +8,6 @@ add_llvm_library(LLVMJIT JITDwarfEmitter.cpp JITEmitter.cpp JITMemoryManager.cpp - MacOSJITEventListener.cpp OProfileJITEventListener.cpp TargetSelect.cpp ) diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp index fa64010..49faf64 100644 --- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp +++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp @@ -22,6 +22,7 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/OwningPtr.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/MutexGuard.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Mutex.h" diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index eacd9f9..073d6fb 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -42,6 +42,7 @@ #include "llvm/System/Disassembler.h" #include "llvm/System/Memory.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -63,17 +64,20 @@ static JIT *TheJIT = 0; namespace { class JITResolverState { public: - typedef std::map<AssertingVH<Function>, void*> FunctionToStubMapTy; - typedef std::map<void*, AssertingVH<Function> > StubToFunctionMapTy; + typedef DenseMap<AssertingVH<Function>, void*> FunctionToStubMapTy; + typedef std::map<void*, AssertingVH<Function> > CallSiteToFunctionMapTy; + typedef DenseMap<AssertingVH<Function>, SmallPtrSet<void*, 1> > + FunctionToCallSitesMapTy; typedef std::map<AssertingVH<GlobalValue>, void*> GlobalToIndirectSymMapTy; private: /// FunctionToStubMap - Keep track of the stub created for a particular /// function so that we can reuse them if necessary. FunctionToStubMapTy FunctionToStubMap; - /// StubToFunctionMap - Keep track of the function that each stub - /// corresponds to. - StubToFunctionMapTy StubToFunctionMap; + /// CallSiteToFunctionMap - Keep track of the function that each lazy call + /// site corresponds to, and vice versa. + CallSiteToFunctionMapTy CallSiteToFunctionMap; + FunctionToCallSitesMapTy FunctionToCallSitesMap; /// GlobalToIndirectSymMap - Keep track of the indirect symbol created for a /// particular GlobalVariable so that we can reuse them if necessary. @@ -85,14 +89,78 @@ namespace { return FunctionToStubMap; } - StubToFunctionMapTy& getStubToFunctionMap(const MutexGuard& locked) { + GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap(const MutexGuard& locked) { assert(locked.holds(TheJIT->lock)); - return StubToFunctionMap; + return GlobalToIndirectSymMap; } - GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap(const MutexGuard& locked) { + pair<void *, Function *> LookupFunctionFromCallSite( + const MutexGuard &locked, void *CallSite) const { assert(locked.holds(TheJIT->lock)); - return GlobalToIndirectSymMap; + + // The address given to us for the stub may not be exactly right, it might be + // a little bit after the stub. As such, use upper_bound to find it. + CallSiteToFunctionMapTy::const_iterator I = + CallSiteToFunctionMap.upper_bound(CallSite); + assert(I != CallSiteToFunctionMap.begin() && + "This is not a known call site!"); + --I; + return *I; + } + + void AddCallSite(const MutexGuard &locked, void *CallSite, Function *F) { + assert(locked.holds(TheJIT->lock)); + + assert(CallSiteToFunctionMap.insert(std::make_pair(CallSite, F)).second && + "Pair was already in CallSiteToFunctionMap"); + FunctionToCallSitesMap[F].insert(CallSite); + } + + // Returns the Function of the stub if a stub was erased, or NULL if there + // was no stub. This function uses the call-site->function map to find a + // relevant function, but asserts that only stubs and not other call sites + // will be passed in. + Function *EraseStub(const MutexGuard &locked, void *Stub) { + CallSiteToFunctionMapTy::iterator C2F_I = + CallSiteToFunctionMap.find(Stub); + if (C2F_I == CallSiteToFunctionMap.end()) { + // Not a stub. + return NULL; + } + + Function *const F = C2F_I->second; +#ifndef NDEBUG + void *RealStub = FunctionToStubMap.lookup(F); + assert(RealStub == Stub && + "Call-site that wasn't a stub pass in to EraseStub"); +#endif + FunctionToStubMap.erase(F); + CallSiteToFunctionMap.erase(C2F_I); + + // Remove the stub from the function->call-sites map, and remove the whole + // entry from the map if that was the last call site. + FunctionToCallSitesMapTy::iterator F2C_I = FunctionToCallSitesMap.find(F); + assert(F2C_I != FunctionToCallSitesMap.end() && + "FunctionToCallSitesMap broken"); + assert(F2C_I->second.erase(Stub) && + "FunctionToCallSitesMap broken"); + if (F2C_I->second.empty()) + FunctionToCallSitesMap.erase(F2C_I); + + return F; + } + + void EraseAllCallSites(const MutexGuard &locked, Function *F) { + assert(locked.holds(TheJIT->lock)); + FunctionToCallSitesMapTy::iterator F2C = FunctionToCallSitesMap.find(F); + if (F2C == FunctionToCallSitesMap.end()) + return; + for (SmallPtrSet<void*, 1>::const_iterator I = F2C->second.begin(), + E = F2C->second.end(); I != E; ++I) { + assert(CallSiteToFunctionMap.erase(*I) == 1 && + "Missing call site->function mapping"); + } + FunctionToCallSitesMap.erase(F2C); } }; @@ -100,7 +168,7 @@ namespace { /// have not yet been compiled. class JITResolver { typedef JITResolverState::FunctionToStubMapTy FunctionToStubMapTy; - typedef JITResolverState::StubToFunctionMapTy StubToFunctionMapTy; + typedef JITResolverState::CallSiteToFunctionMapTy CallSiteToFunctionMapTy; typedef JITResolverState::GlobalToIndirectSymMapTy GlobalToIndirectSymMapTy; /// LazyResolverFn - The target lazy resolver function that we actually @@ -154,7 +222,7 @@ namespace { void *AddCallbackAtLocation(Function *F, void *Location) { MutexGuard locked(TheJIT->lock); /// Get the target-specific JIT resolver function. - state.getStubToFunctionMap(locked)[Location] = F; + state.AddCallSite(locked, Location, F); return (void*)(intptr_t)LazyResolverFn; } @@ -183,8 +251,7 @@ void *JITResolver::getFunctionStubIfAvailable(Function *F) { MutexGuard locked(TheJIT->lock); // If we already have a stub for this function, recycle it. - void *&Stub = state.getFunctionToStubMap(locked)[F]; - return Stub; + return state.getFunctionToStubMap(locked).lookup(F); } /// getFunctionStub - This returns a pointer to a function stub, creating @@ -230,7 +297,7 @@ void *JITResolver::getFunctionStub(Function *F) { // Finally, keep track of the stub-to-Function mapping so that the // JITCompilerFn knows which function to compile! - state.getStubToFunctionMap(locked)[Stub] = F; + state.AddCallSite(locked, Stub, F); // If we are JIT'ing non-lazily but need to call a function that does not // exist yet, add it to the JIT's work list so that we can fill in the stub @@ -291,10 +358,11 @@ void JITResolver::getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs, SmallVectorImpl<void*> &Ptrs) { MutexGuard locked(TheJIT->lock); - FunctionToStubMapTy &FM = state.getFunctionToStubMap(locked); + const FunctionToStubMapTy &FM = state.getFunctionToStubMap(locked); GlobalToIndirectSymMapTy &GM = state.getGlobalToIndirectSymMap(locked); - for (FunctionToStubMapTy::iterator i = FM.begin(), e = FM.end(); i != e; ++i){ + for (FunctionToStubMapTy::const_iterator i = FM.begin(), e = FM.end(); + i != e; ++i){ Function *F = i->first; if (F->isDeclaration() && F->hasExternalLinkage()) { GVs.push_back(i->first); @@ -310,20 +378,15 @@ void JITResolver::getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs, GlobalValue *JITResolver::invalidateStub(void *Stub) { MutexGuard locked(TheJIT->lock); - - FunctionToStubMapTy &FM = state.getFunctionToStubMap(locked); - StubToFunctionMapTy &SM = state.getStubToFunctionMap(locked); + GlobalToIndirectSymMapTy &GM = state.getGlobalToIndirectSymMap(locked); - + // Look up the cheap way first, to see if it's a function stub we are // invalidating. If so, remove it from both the forward and reverse maps. - if (SM.find(Stub) != SM.end()) { - Function *F = SM[Stub]; - SM.erase(Stub); - FM.erase(F); + if (Function *F = state.EraseStub(locked, Stub)) { return F; } - + // Otherwise, it might be an indirect symbol stub. Find it and remove it. for (GlobalToIndirectSymMapTy::iterator i = GM.begin(), e = GM.end(); i != e; ++i) { @@ -361,14 +424,12 @@ void *JITResolver::JITCompilerFn(void *Stub) { // JIT lock to be unlocked. MutexGuard locked(TheJIT->lock); - // The address given to us for the stub may not be exactly right, it might be - // a little bit after the stub. As such, use upper_bound to find it. - StubToFunctionMapTy::iterator I = - JR.state.getStubToFunctionMap(locked).upper_bound(Stub); - assert(I != JR.state.getStubToFunctionMap(locked).begin() && - "This is not a known stub!"); - F = (--I)->second; - ActualPtr = I->first; + // The address given to us for the stub may not be exactly right, it might + // be a little bit after the stub. As such, use upper_bound to find it. + pair<void*, Function*> I = + JR.state.LookupFunctionFromCallSite(locked, Stub); + F = I.second; + ActualPtr = I.first; } // If we have already code generated the function, just return the address. @@ -383,25 +444,21 @@ void *JITResolver::JITCompilerFn(void *Stub) { + F->getName() + "' when lazy compiles are disabled!"); } - // We might like to remove the stub from the StubToFunction map. - // We can't do that! Multiple threads could be stuck, waiting to acquire the - // lock above. As soon as the 1st function finishes compiling the function, - // the next one will be released, and needs to be able to find the function - // it needs to call. - //JR.state.getStubToFunctionMap(locked).erase(I); - DEBUG(errs() << "JIT: Lazily resolving function '" << F->getName() << "' In stub ptr = " << Stub << " actual ptr = " << ActualPtr << "\n"); Result = TheJIT->getPointerToFunction(F); } - - // Reacquire the lock to erase the stub in the map. + + // Reacquire the lock to update the GOT map. MutexGuard locked(TheJIT->lock); - // We don't need to reuse this stub in the future, as F is now compiled. - JR.state.getFunctionToStubMap(locked).erase(F); + // We might like to remove the call site from the CallSiteToFunction map, but + // we can't do that! Multiple threads could be stuck, waiting to acquire the + // lock above. As soon as the 1st function finishes compiling the function, + // the next one will be released, and needs to be able to find the function it + // needs to call. // FIXME: We could rewrite all references to this stub if we knew them. @@ -492,6 +549,13 @@ namespace { /// finishFunction. JITEvent_EmittedFunctionDetails EmissionDetails; + struct EmittedCode { + void *FunctionBody; + void *ExceptionTable; + EmittedCode() : FunctionBody(0), ExceptionTable(0) {} + }; + DenseMap<const Function *, EmittedCode> EmittedFunctions; + // CurFnStubUses - For a given Function, a vector of stubs that it // references. This facilitates the JIT detecting that a stub is no // longer used, so that it may be deallocated. @@ -955,7 +1019,8 @@ void JITEmitter::startFunction(MachineFunction &F) { BufferBegin = CurBufferPtr = MemMgr->startFunctionBody(F.getFunction(), ActualSize); BufferEnd = BufferBegin+ActualSize; - + EmittedFunctions[F.getFunction()].FunctionBody = BufferBegin; + // Ensure the constant pool/jump table info is at least 4-byte aligned. emitAlignment(16); @@ -1145,6 +1210,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(), ActualSize); BufferEnd = BufferBegin+ActualSize; + EmittedFunctions[F.getFunction()].ExceptionTable = BufferBegin; uint8_t *EhStart; uint8_t *FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd, EhStart); @@ -1188,7 +1254,13 @@ void JITEmitter::retryWithMoreMemory(MachineFunction &F) { /// deallocateMemForFunction - Deallocate all memory for the specified /// function body. Also drop any references the function has to stubs. void JITEmitter::deallocateMemForFunction(const Function *F) { - MemMgr->deallocateMemForFunction(F); + DenseMap<const Function *, EmittedCode>::iterator Emitted = + EmittedFunctions.find(F); + if (Emitted != EmittedFunctions.end()) { + MemMgr->deallocateFunctionBody(Emitted->second.FunctionBody); + MemMgr->deallocateExceptionTable(Emitted->second.ExceptionTable); + EmittedFunctions.erase(Emitted); + } // TODO: Do we need to unregister exception handling information from libgcc // here? diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp index 474843f..3796624 100644 --- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp +++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp @@ -297,9 +297,6 @@ namespace { uint8_t *GOTBase; // Target Specific reserved memory void *DlsymTable; // Stub external symbol information - - std::map<const Function*, MemoryRangeHeader*> FunctionBlocks; - std::map<const Function*, MemoryRangeHeader*> TableBlocks; public: DefaultJITMemoryManager(); ~DefaultJITMemoryManager(); @@ -414,7 +411,6 @@ namespace { "Mismatched function start/end!"); uintptr_t BlockSize = FunctionEnd - (uint8_t *)CurBlock; - FunctionBlocks[F] = CurBlock; // Release the memory at the end of this block that isn't needed. FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize); @@ -464,7 +460,6 @@ namespace { "Mismatched table start/end!"); uintptr_t BlockSize = TableEnd - (uint8_t *)CurBlock; - TableBlocks[F] = CurBlock; // Release the memory at the end of this block that isn't needed. FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize); @@ -478,15 +473,9 @@ namespace { return DlsymTable; } - /// deallocateMemForFunction - Deallocate all memory for the specified - /// function body. - void deallocateMemForFunction(const Function *F) { - std::map<const Function*, MemoryRangeHeader*>::iterator - I = FunctionBlocks.find(F); - if (I == FunctionBlocks.end()) return; - + void deallocateBlock(void *Block) { // Find the block that is allocated for this function. - MemoryRangeHeader *MemRange = I->second; + MemoryRangeHeader *MemRange = static_cast<MemoryRangeHeader*>(Block) - 1; assert(MemRange->ThisAllocated && "Block isn't allocated!"); // Fill the buffer with garbage! @@ -496,27 +485,18 @@ namespace { // Free the memory. FreeMemoryList = MemRange->FreeBlock(FreeMemoryList); - - // Finally, remove this entry from FunctionBlocks. - FunctionBlocks.erase(I); - - I = TableBlocks.find(F); - if (I == TableBlocks.end()) return; - - // Find the block that is allocated for this function. - MemRange = I->second; - assert(MemRange->ThisAllocated && "Block isn't allocated!"); + } - // Fill the buffer with garbage! - if (PoisonMemory) { - memset(MemRange+1, 0xCD, MemRange->BlockSize-sizeof(*MemRange)); - } + /// deallocateFunctionBody - Deallocate all memory for the specified + /// function body. + void deallocateFunctionBody(void *Body) { + if (Body) deallocateBlock(Body); + } - // Free the memory. - FreeMemoryList = MemRange->FreeBlock(FreeMemoryList); - - // Finally, remove this entry from TableBlocks. - TableBlocks.erase(I); + /// deallocateExceptionTable - Deallocate memory for the specified + /// exception table. + void deallocateExceptionTable(void *ET) { + if (ET) deallocateBlock(ET); } /// setMemoryWritable - When code generation is in progress, diff --git a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp index 69398be..00c4af7 100644 --- a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp +++ b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp @@ -69,16 +69,16 @@ OProfileJITEventListener::~OProfileJITEventListener() { } class FilenameCache { - // Holds the filename of each CompileUnit, so that we can pass the + // Holds the filename of each Scope, so that we can pass the // pointer into oprofile. These char*s are freed in the destructor. DenseMap<MDNode*, char*> Filenames; public: - const char *getFilename(MDNode *CompileUnit) { - char *&Filename = Filenames[CompileUnit]; + const char *getFilename(MDNode *Scope) { + char *&Filename = Filenames[Scope]; if (Filename == NULL) { - DICompileUnit CU(CompileUnit); - Filename = strdup(CU.getFilename()); + DIScope S(Scope); + Filename = strdup(S.getFilename()); } return Filename; } @@ -97,7 +97,7 @@ static debug_line_info LineStartToOProfileFormat( Result.vma = Address; const DebugLocTuple &tuple = MF.getDebugLocTuple(Loc); Result.lineno = tuple.Line; - Result.filename = Filenames.getFilename(tuple.CompileUnit); + Result.filename = Filenames.getFilename(tuple.Scope); DEBUG(errs() << "Mapping " << reinterpret_cast<void*>(Result.vma) << " to " << Result.filename << ":" << Result.lineno << "\n"); return Result; diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index e56e968..e939f37 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -123,23 +123,27 @@ void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { OS << " = "; Value->print(OS, &MAI); OS << '\n'; + + // FIXME: Lift context changes into super class. + // FIXME: Set associated section. + Symbol->setValue(Value); } -void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, +void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, SymbolAttr Attribute) { switch (Attribute) { - case Global: OS << ".globl"; break; - case Hidden: OS << ".hidden"; break; + case Global: OS << ".globl"; break; + case Hidden: OS << ".hidden"; break; case IndirectSymbol: OS << ".indirect_symbol"; break; - case Internal: OS << ".internal"; break; - case LazyReference: OS << ".lazy_reference"; break; - case NoDeadStrip: OS << ".no_dead_strip"; break; - case PrivateExtern: OS << ".private_extern"; break; - case Protected: OS << ".protected"; break; - case Reference: OS << ".reference"; break; - case Weak: OS << ".weak"; break; + case Internal: OS << ".internal"; break; + case LazyReference: OS << ".lazy_reference"; break; + case NoDeadStrip: OS << ".no_dead_strip"; break; + case PrivateExtern: OS << ".private_extern"; break; + case Protected: OS << ".protected"; break; + case Reference: OS << ".reference"; break; + case Weak: OS << ".weak"; break; case WeakDefinition: OS << ".weak_definition"; break; - case WeakReference: OS << ".weak_reference"; break; + case WeakReference: OS << ".weak_reference"; break; } OS << ' '; diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 0afdf98..4f39f1e 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -9,7 +9,10 @@ #define DEBUG_TYPE "assembler" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" #include "llvm/Target/TargetMachOWriterInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallString.h" @@ -48,7 +51,7 @@ class MachObjectWriter { Header_Magic32 = 0xFEEDFACE, Header_Magic64 = 0xFEEDFACF }; - + static const unsigned Header32Size = 28; static const unsigned Header64Size = 32; static const unsigned SegmentLoadCommand32Size = 56; @@ -127,7 +130,7 @@ class MachObjectWriter { bool IsLSB; public: - MachObjectWriter(raw_ostream &_OS, bool _IsLSB = true) + MachObjectWriter(raw_ostream &_OS, bool _IsLSB = true) : OS(_OS), IsLSB(_IsLSB) { } @@ -170,10 +173,10 @@ public: void WriteZeros(unsigned N) { const char Zeros[16] = { 0 }; - + for (unsigned i = 0, e = N / 16; i != e; ++i) OS << StringRef(Zeros, 16); - + OS << StringRef(Zeros, N % 16); } @@ -184,7 +187,7 @@ public: } /// @} - + void WriteHeader32(unsigned NumLoadCommands, unsigned LoadCommandsSize, bool SubsectionsViaSymbols) { uint32_t Flags = 0; @@ -384,7 +387,7 @@ public: Write32(MSD.StringIndex); Write8(Type); Write8(MSD.SectionIndex); - + // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc' // value. Write16(Flags); @@ -397,6 +400,7 @@ public: }; void ComputeScatteredRelocationInfo(MCAssembler &Asm, MCSectionData::Fixup &Fixup, + const MCValue &Target, DenseMap<const MCSymbol*,MCSymbolData*> &SymbolMap, std::vector<MachRelocationEntry> &Relocs) { uint32_t Address = Fixup.Fragment->getOffset() + Fixup.Offset; @@ -404,13 +408,12 @@ public: unsigned Type = RIT_Vanilla; // See <reloc.h>. - - const MCSymbol *A = Fixup.Value.getSymA(); + const MCSymbol *A = Target.getSymA(); MCSymbolData *SD = SymbolMap.lookup(A); uint32_t Value = SD->getFragment()->getAddress() + SD->getOffset(); uint32_t Value2 = 0; - if (const MCSymbol *B = Fixup.Value.getSymB()) { + if (const MCSymbol *B = Target.getSymB()) { Type = RIT_LocalDifference; MCSymbolData *SD = SymbolMap.lookup(B); @@ -421,7 +424,7 @@ public: assert((1U << Log2Size) == Fixup.Size && "Invalid fixup size!"); // The value which goes in the fixup is current value of the expression. - Fixup.FixedValue = Value - Value2 + Fixup.Value.getConstant(); + Fixup.FixedValue = Value - Value2 + Target.getConstant(); MachRelocationEntry MRE; MRE.Word0 = ((Address << 0) | @@ -450,14 +453,18 @@ public: MCSectionData::Fixup &Fixup, DenseMap<const MCSymbol*,MCSymbolData*> &SymbolMap, std::vector<MachRelocationEntry> &Relocs) { - // If this is a local symbol plus an offset or a difference, then we need a + MCValue Target; + if (!Fixup.Value->EvaluateAsRelocatable(Target)) + llvm_report_error("expected relocatable expression"); + + // If this is a difference or a local symbol plus an offset, then we need a // scattered relocation entry. - if (Fixup.Value.getSymB()) // a - b - return ComputeScatteredRelocationInfo(Asm, Fixup, SymbolMap, Relocs); - if (Fixup.Value.getSymA() && Fixup.Value.getConstant()) - if (!Fixup.Value.getSymA()->isUndefined()) - return ComputeScatteredRelocationInfo(Asm, Fixup, SymbolMap, Relocs); - + if (Target.getSymB() || + (Target.getSymA() && !Target.getSymA()->isUndefined() && + Target.getConstant())) + return ComputeScatteredRelocationInfo(Asm, Fixup, Target, + SymbolMap, Relocs); + // See <reloc.h>. uint32_t Address = Fixup.Fragment->getOffset() + Fixup.Offset; uint32_t Value = 0; @@ -466,15 +473,15 @@ public: unsigned IsExtern = 0; unsigned Type = 0; - if (Fixup.Value.isAbsolute()) { // constant + if (Target.isAbsolute()) { // constant // SymbolNum of 0 indicates the absolute section. Type = RIT_Vanilla; Value = 0; llvm_unreachable("FIXME: Not yet implemented!"); } else { - const MCSymbol *Symbol = Fixup.Value.getSymA(); + const MCSymbol *Symbol = Target.getSymA(); MCSymbolData *SD = SymbolMap.lookup(Symbol); - + if (Symbol->isUndefined()) { IsExtern = 1; Index = SD->getIndex(); @@ -495,7 +502,7 @@ public: } // The value which goes in the fixup is current value of the expression. - Fixup.FixedValue = Value + Fixup.Value.getConstant(); + Fixup.FixedValue = Value + Target.getConstant(); unsigned Log2Size = Log2_32(Fixup.Size); assert((1U << Log2Size) == Fixup.Size && "Invalid fixup size!"); @@ -510,7 +517,7 @@ public: (Type << 28)); Relocs.push_back(MRE); } - + void BindIndirectSymbols(MCAssembler &Asm, DenseMap<const MCSymbol*,MCSymbolData*> &SymbolMap) { // This is the point where 'as' creates actual symbols for indirect symbols @@ -703,7 +710,7 @@ public: if (NumSymbols) ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData, UndefinedSymbolData); - + // The section data starts after the header, the segment load command (and // section headers) and the symbol table. unsigned NumLoadCommands = 1; @@ -733,7 +740,7 @@ public: SectionDataSize = std::max(SectionDataSize, SD.getAddress() + SD.getSize()); - SectionDataFileSize = std::max(SectionDataFileSize, + SectionDataFileSize = std::max(SectionDataFileSize, SD.getAddress() + SD.getFileSize()); } @@ -748,9 +755,9 @@ public: Asm.getSubsectionsViaSymbols()); WriteSegmentLoadCommand32(NumSections, VMSize, SectionDataStart, SectionDataSize); - + // ... and then the section headers. - // + // // We also compute the section relocations while we do this. Note that // compute relocation info will also update the fixup to have the correct // value; this will be overwrite the appropriate data in the fragment when @@ -774,7 +781,7 @@ public: WriteSection32(SD, SectionStart, RelocTableEnd, NumRelocs); RelocTableEnd += NumRelocs * RelocationInfoSize; } - + // Write the symbol table load command, if used. if (NumSymbols) { unsigned FirstLocalSymbol = 0; @@ -923,7 +930,7 @@ MCSectionData::LookupFixup(const MCFragment *Fragment, uint64_t Offset) const { return 0; } - + /* *** */ MCSymbolData::MCSymbolData() : Symbol(0) {} @@ -960,7 +967,7 @@ void MCAssembler::LayoutSection(MCSectionData &SD) { switch (F.getKind()) { case MCFragment::FT_Align: { MCAlignFragment &AF = cast<MCAlignFragment>(F); - + uint64_t Size = OffsetToAlignment(Address, AF.getAlignment()); if (Size > AF.getMaxBytesToEmit()) AF.setFileSize(0); @@ -978,8 +985,12 @@ void MCAssembler::LayoutSection(MCSectionData &SD) { F.setFileSize(F.getMaxFileSize()); + MCValue Target; + if (!FF.getValue().EvaluateAsRelocatable(Target)) + llvm_report_error("expected relocatable expression"); + // If the fill value is constant, thats it. - if (FF.getValue().isAbsolute()) + if (Target.isAbsolute()) break; // Otherwise, add fixups for the values. @@ -994,19 +1005,23 @@ void MCAssembler::LayoutSection(MCSectionData &SD) { case MCFragment::FT_Org: { MCOrgFragment &OF = cast<MCOrgFragment>(F); - if (!OF.getOffset().isAbsolute()) + MCValue Target; + if (!OF.getOffset().EvaluateAsRelocatable(Target)) + llvm_report_error("expected relocatable expression"); + + if (!Target.isAbsolute()) llvm_unreachable("FIXME: Not yet implemented!"); - uint64_t OrgOffset = OF.getOffset().getConstant(); + uint64_t OrgOffset = Target.getConstant(); uint64_t Offset = Address - SD.getAddress(); // FIXME: We need a way to communicate this error. if (OrgOffset < Offset) - llvm_report_error("invalid .org offset '" + Twine(OrgOffset) + + llvm_report_error("invalid .org offset '" + Twine(OrgOffset) + "' (at offset '" + Twine(Offset) + "'"); - + F.setFileSize(OrgOffset - Offset); break; - } + } case MCFragment::FT_ZeroFill: { MCZeroFillFragment &ZFF = cast<MCZeroFillFragment>(F); @@ -1038,7 +1053,7 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F, MachObjectWriter &MOW) { uint64_t Start = OS.tell(); (void) Start; - + ++EmittedFragments; // FIXME: Embed in fragments instead? @@ -1051,8 +1066,8 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F, // multiple .align directives to enforce the semantics it wants), but is // severe enough that we want to report it. How to handle this? if (Count * AF.getValueSize() != AF.getFileSize()) - llvm_report_error("undefined .align directive, value size '" + - Twine(AF.getValueSize()) + + llvm_report_error("undefined .align directive, value size '" + + Twine(AF.getValueSize()) + "' is not a divisor of padding size '" + Twine(AF.getFileSize()) + "'"); @@ -1077,10 +1092,15 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F, MCFillFragment &FF = cast<MCFillFragment>(F); int64_t Value = 0; - if (FF.getValue().isAbsolute()) - Value = FF.getValue().getConstant(); + + MCValue Target; + if (!FF.getValue().EvaluateAsRelocatable(Target)) + llvm_report_error("expected relocatable expression"); + + if (Target.isAbsolute()) + Value = Target.getConstant(); for (uint64_t i = 0, e = FF.getCount(); i != e; ++i) { - if (!FF.getValue().isAbsolute()) { + if (!Target.isAbsolute()) { // Find the fixup. // // FIXME: Find a better way to write in the fixes. @@ -1101,7 +1121,7 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F, } break; } - + case MCFragment::FT_Org: { MCOrgFragment &OF = cast<MCOrgFragment>(F); @@ -1131,7 +1151,7 @@ static void WriteFileData(raw_ostream &OS, const MCSectionData &SD, uint64_t Start = OS.tell(); (void) Start; - + for (MCSectionData::const_iterator it = SD.begin(), ie = SD.end(); it != ie; ++it) WriteFileData(OS, *it, MOW); diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index f36564a..09479c5 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -8,10 +8,11 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCContext.h" - #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" using namespace llvm; MCContext::MCContext() { @@ -38,6 +39,13 @@ MCSymbol *MCContext::GetOrCreateSymbol(const StringRef &Name) { return Entry = new (*this) MCSymbol(Name, false); } +MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) { + SmallString<128> NameSV; + Name.toVector(NameSV); + return GetOrCreateSymbol(NameSV.str()); +} + + MCSymbol *MCContext::CreateTemporarySymbol(const StringRef &Name) { // If unnamed, just create a symbol. if (Name.empty()) @@ -52,20 +60,3 @@ MCSymbol *MCContext::CreateTemporarySymbol(const StringRef &Name) { MCSymbol *MCContext::LookupSymbol(const StringRef &Name) const { return Symbols.lookup(Name); } - -void MCContext::ClearSymbolValue(const MCSymbol *Sym) { - SymbolValues.erase(Sym); -} - -void MCContext::SetSymbolValue(const MCSymbol *Sym, const MCValue &Value) { - SymbolValues[Sym] = Value; -} - -const MCValue *MCContext::GetSymbolValue(const MCSymbol *Sym) const { - DenseMap<const MCSymbol*, MCValue>::iterator it = SymbolValues.find(Sym); - - if (it == SymbolValues.end()) - return 0; - - return &it->second; -} diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index 0f3e053..c950ff2 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -141,10 +141,10 @@ const MCSymbolRefExpr *MCSymbolRefExpr::Create(const StringRef &Name, /* *** */ -bool MCExpr::EvaluateAsAbsolute(MCContext &Ctx, int64_t &Res) const { +bool MCExpr::EvaluateAsAbsolute(int64_t &Res) const { MCValue Value; - if (!EvaluateAsRelocatable(Ctx, Value) || !Value.isAbsolute()) + if (!EvaluateAsRelocatable(Value) || !Value.isAbsolute()) return false; Res = Value.getConstant(); @@ -173,7 +173,7 @@ static bool EvaluateSymbolicAdd(const MCValue &LHS, const MCSymbol *RHS_A, return true; } -bool MCExpr::EvaluateAsRelocatable(MCContext &Ctx, MCValue &Res) const { +bool MCExpr::EvaluateAsRelocatable(MCValue &Res) const { switch (getKind()) { case Constant: Res = MCValue::get(cast<MCConstantExpr>(this)->getValue()); @@ -181,10 +181,12 @@ bool MCExpr::EvaluateAsRelocatable(MCContext &Ctx, MCValue &Res) const { case SymbolRef: { const MCSymbol &Sym = cast<MCSymbolRefExpr>(this)->getSymbol(); - if (const MCValue *Value = Ctx.GetSymbolValue(&Sym)) - Res = *Value; - else - Res = MCValue::get(&Sym, 0, 0); + + // Evaluate recursively if this is a variable. + if (Sym.isVariable()) + return Sym.getValue()->EvaluateAsRelocatable(Res); + + Res = MCValue::get(&Sym, 0, 0); return true; } @@ -192,7 +194,7 @@ bool MCExpr::EvaluateAsRelocatable(MCContext &Ctx, MCValue &Res) const { const MCUnaryExpr *AUE = cast<MCUnaryExpr>(this); MCValue Value; - if (!AUE->getSubExpr()->EvaluateAsRelocatable(Ctx, Value)) + if (!AUE->getSubExpr()->EvaluateAsRelocatable(Value)) return false; switch (AUE->getOpcode()) { @@ -225,8 +227,8 @@ bool MCExpr::EvaluateAsRelocatable(MCContext &Ctx, MCValue &Res) const { const MCBinaryExpr *ABE = cast<MCBinaryExpr>(this); MCValue LHSValue, RHSValue; - if (!ABE->getLHS()->EvaluateAsRelocatable(Ctx, LHSValue) || - !ABE->getRHS()->EvaluateAsRelocatable(Ctx, RHSValue)) + if (!ABE->getLHS()->EvaluateAsRelocatable(LHSValue) || + !ABE->getRHS()->EvaluateAsRelocatable(RHSValue)) return false; // We only support a few operations on non-constant expressions, handle diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index e04bd1f..189f072 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -198,7 +198,9 @@ void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { assert((Symbol->isUndefined() || Symbol->isAbsolute()) && "Cannot define a symbol twice!"); - llvm_unreachable("FIXME: Not yet implemented!"); + // FIXME: Lift context changes into super class. + // FIXME: Set associated section. + Symbol->setValue(Value); } void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, @@ -321,12 +323,7 @@ void MCMachOStreamer::EmitBytes(const StringRef &Data) { } void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size) { - MCValue RelocValue; - - if (!AddValueSymbols(Value)->EvaluateAsRelocatable(getContext(), RelocValue)) - return llvm_report_error("expected relocatable expression"); - - new MCFillFragment(RelocValue, Size, 1, CurSectionData); + new MCFillFragment(*AddValueSymbols(Value), Size, 1, CurSectionData); } void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment, @@ -344,13 +341,7 @@ void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment, void MCMachOStreamer::EmitValueToOffset(const MCExpr *Offset, unsigned char Value) { - MCValue RelocOffset; - - if (!AddValueSymbols(Offset)->EvaluateAsRelocatable(getContext(), - RelocOffset)) - return llvm_report_error("expected relocatable expression"); - - new MCOrgFragment(RelocOffset, Value, CurSectionData); + new MCOrgFragment(*Offset, Value, CurSectionData); } void MCMachOStreamer::EmitInstruction(const MCInst &Inst) { diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index e431d27..7e42e2d 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -48,6 +48,7 @@ namespace llvm { unsigned int arithmeticOK; }; + const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, true }; const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true }; const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true }; const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true }; @@ -2812,6 +2813,35 @@ APFloat::convertFloatAPFloatToAPInt() const (mysignificand & 0x7fffff))); } +APInt +APFloat::convertHalfAPFloatToAPInt() const +{ + assert(semantics == (const llvm::fltSemantics*)&IEEEhalf); + assert (partCount()==1); + + uint32_t myexponent, mysignificand; + + if (category==fcNormal) { + myexponent = exponent+15; //bias + mysignificand = (uint32_t)*significandParts(); + if (myexponent == 1 && !(mysignificand & 0x400)) + myexponent = 0; // denormal + } else if (category==fcZero) { + myexponent = 0; + mysignificand = 0; + } else if (category==fcInfinity) { + myexponent = 0x1f; + mysignificand = 0; + } else { + assert(category == fcNaN && "Unknown category!"); + myexponent = 0x1f; + mysignificand = (uint32_t)*significandParts(); + } + + return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) | + (mysignificand & 0x3ff))); +} + // This function creates an APInt that is just a bit map of the floating // point constant as it would appear in memory. It is not a conversion, // and treating the result as a normal integer is unlikely to be useful. @@ -2819,6 +2849,9 @@ APFloat::convertFloatAPFloatToAPInt() const APInt APFloat::bitcastToAPInt() const { + if (semantics == (const llvm::fltSemantics*)&IEEEhalf) + return convertHalfAPFloatToAPInt(); + if (semantics == (const llvm::fltSemantics*)&IEEEsingle) return convertFloatAPFloatToAPInt(); @@ -3051,6 +3084,39 @@ APFloat::initFromFloatAPInt(const APInt & api) } } +void +APFloat::initFromHalfAPInt(const APInt & api) +{ + assert(api.getBitWidth()==16); + uint32_t i = (uint32_t)*api.getRawData(); + uint32_t myexponent = (i >> 10) & 0x1f; + uint32_t mysignificand = i & 0x3ff; + + initialize(&APFloat::IEEEhalf); + assert(partCount()==1); + + sign = i >> 15; + if (myexponent==0 && mysignificand==0) { + // exponent, significand meaningless + category = fcZero; + } else if (myexponent==0x1f && mysignificand==0) { + // exponent, significand meaningless + category = fcInfinity; + } else if (myexponent==0x1f && mysignificand!=0) { + // sign, exponent, significand meaningless + category = fcNaN; + *significandParts() = mysignificand; + } else { + category = fcNormal; + exponent = myexponent - 15; //bias + *significandParts() = mysignificand; + if (myexponent==0) // denormal + exponent = -14; + else + *significandParts() |= 0x400; // integer bit + } +} + /// Treat api as containing the bits of a floating point number. Currently /// we infer the floating point type from the size of the APInt. The /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful @@ -3058,7 +3124,9 @@ APFloat::initFromFloatAPInt(const APInt & api) void APFloat::initFromAPInt(const APInt& api, bool isIEEE) { - if (api.getBitWidth() == 32) + if (api.getBitWidth() == 16) + return initFromHalfAPInt(api); + else if (api.getBitWidth() == 32) return initFromFloatAPInt(api); else if (api.getBitWidth()==64) return initFromDoubleAPInt(api); diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp index 1618086..c72f121 100644 --- a/lib/Support/StringExtras.cpp +++ b/lib/Support/StringExtras.cpp @@ -56,59 +56,3 @@ void llvm::SplitString(const std::string &Source, S2 = getToken(S, Delimiters); } } - - - -/// UnescapeString - Modify the argument string, turning two character sequences -/// @verbatim -/// like '\\' 'n' into '\n'. This handles: \e \a \b \f \n \r \t \v \' \ and -/// \num (where num is a 1-3 byte octal value). -/// @endverbatim -void llvm::UnescapeString(std::string &Str) { - for (unsigned i = 0; i != Str.size(); ++i) { - if (Str[i] == '\\' && i != Str.size()-1) { - switch (Str[i+1]) { - default: continue; // Don't execute the code after the switch. - case 'a': Str[i] = '\a'; break; - case 'b': Str[i] = '\b'; break; - case 'e': Str[i] = 27; break; - case 'f': Str[i] = '\f'; break; - case 'n': Str[i] = '\n'; break; - case 'r': Str[i] = '\r'; break; - case 't': Str[i] = '\t'; break; - case 'v': Str[i] = '\v'; break; - case '"': Str[i] = '\"'; break; - case '\'': Str[i] = '\''; break; - case '\\': Str[i] = '\\'; break; - } - // Nuke the second character. - Str.erase(Str.begin()+i+1); - } - } -} - -/// EscapeString - Modify the argument string, turning '\\' and anything that -/// doesn't satisfy std::isprint into an escape sequence. -void llvm::EscapeString(std::string &Str) { - for (unsigned i = 0; i != Str.size(); ++i) { - if (Str[i] == '\\') { - ++i; - Str.insert(Str.begin()+i, '\\'); - } else if (Str[i] == '\t') { - Str[i++] = '\\'; - Str.insert(Str.begin()+i, 't'); - } else if (Str[i] == '"') { - Str.insert(Str.begin()+i++, '\\'); - } else if (Str[i] == '\n') { - Str[i++] = '\\'; - Str.insert(Str.begin()+i, 'n'); - } else if (!std::isprint(Str[i])) { - // Always expand to a 3-digit octal escape. - unsigned Char = Str[i]; - Str[i++] = '\\'; - Str.insert(Str.begin()+i++, '0'+((Char/64) & 7)); - Str.insert(Str.begin()+i++, '0'+((Char/8) & 7)); - Str.insert(Str.begin()+i , '0'+( Char & 7)); - } - } -} diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp index 040308b..a729d3d 100644 --- a/lib/Support/StringMap.cpp +++ b/lib/Support/StringMap.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringExtras.h" #include <cassert> using namespace llvm; @@ -46,20 +47,6 @@ void StringMapImpl::init(unsigned InitSize) { } -/// HashString - Compute a hash code for the specified string. -/// -static unsigned HashString(const char *Start, const char *End) { - // Bernstein hash function. - unsigned int Result = 0; - // TODO: investigate whether a modified bernstein hash function performs - // better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx - // X*33+c -> X*33^c - while (Start != End) - Result = Result * 33 + *Start++; - Result = Result + (Result >> 5); - return Result; -} - /// LookupBucketFor - Look up the bucket that the specified string should end /// up in. If it already exists as a key in the map, the Item pointer for the /// specified bucket will be non-null. Otherwise, it will be null. In either @@ -71,7 +58,7 @@ unsigned StringMapImpl::LookupBucketFor(const StringRef &Name) { init(16); HTSize = NumBuckets; } - unsigned FullHashValue = HashString(Name.begin(), Name.end()); + unsigned FullHashValue = HashString(Name); unsigned BucketNo = FullHashValue & (HTSize-1); unsigned ProbeAmt = 1; @@ -126,7 +113,7 @@ unsigned StringMapImpl::LookupBucketFor(const StringRef &Name) { int StringMapImpl::FindKey(const StringRef &Key) const { unsigned HTSize = NumBuckets; if (HTSize == 0) return -1; // Really empty table? - unsigned FullHashValue = HashString(Key.begin(), Key.end()); + unsigned FullHashValue = HashString(Key); unsigned BucketNo = FullHashValue & (HTSize-1); unsigned ProbeAmt = 1; diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 6f805da..26a1a4e 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -96,6 +96,7 @@ const char *Triple::getOSTypeName(OSType Kind) { case OpenBSD: return "openbsd"; case Solaris: return "solaris"; case Win32: return "win32"; + case Haiku: return "haiku"; } return "<invalid>"; @@ -276,6 +277,8 @@ void Triple::Parse() const { OS = Solaris; else if (OSName.startswith("win32")) OS = Win32; + else if (OSName.startswith("haiku")) + OS = Haiku; else OS = UnknownOS; diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 0a82cc1..31451cc 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -168,6 +168,40 @@ raw_ostream &raw_ostream::write_hex(unsigned long long N) { return write(CurPtr, EndPtr-CurPtr); } +raw_ostream &raw_ostream::write_escaped(StringRef Str) { + for (unsigned i = 0, e = Str.size(); i != e; ++i) { + unsigned char c = Str[i]; + + switch (c) { + case '\\': + *this << '\\' << '\\'; + break; + case '\t': + *this << '\\' << 't'; + break; + case '\n': + *this << '\\' << 'n'; + break; + case '"': + *this << '\\' << '"'; + break; + default: + if (std::isprint(c)) { + *this << c; + break; + } + + // Always expand to a 3-character octal escape. + *this << '\\'; + *this << char('0' + ((c >> 6) & 7)); + *this << char('0' + ((c >> 3) & 7)); + *this << char('0' + ((c >> 0) & 7)); + } + } + + return *this; +} + raw_ostream &raw_ostream::operator<<(const void *P) { *this << '0' << 'x'; diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc index 56dea25..c52f3a8 100644 --- a/lib/System/Unix/Program.inc +++ b/lib/System/Unix/Program.inc @@ -244,7 +244,7 @@ Program::Wait(unsigned secondsToWait, int status; uint64_t pid = reinterpret_cast<uint64_t>(Data_); pid_t child = static_cast<pid_t>(pid); - while (wait(&status) != child) + while (waitpid(pid, &status, 0) != child) if (secondsToWait && errno == EINTR) { // Kill the child. kill(child, SIGKILL); diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index 1839153..c603708 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -15,7 +15,6 @@ #define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include <cassert> @@ -38,7 +37,7 @@ namespace ARM_AM { static inline const char *getShiftOpcStr(ShiftOpc Op) { switch (Op) { - default: llvm_unreachable("Unknown shift opc!"); + default: assert(0 && "Unknown shift opc!"); case ARM_AM::asr: return "asr"; case ARM_AM::lsl: return "lsl"; case ARM_AM::lsr: return "lsr"; @@ -71,7 +70,7 @@ namespace ARM_AM { static inline const char *getAMSubModeStr(AMSubMode Mode) { switch (Mode) { - default: llvm_unreachable("Unknown addressing sub-mode!"); + default: assert(0 && "Unknown addressing sub-mode!"); case ARM_AM::ia: return "ia"; case ARM_AM::ib: return "ib"; case ARM_AM::da: return "da"; @@ -81,7 +80,7 @@ namespace ARM_AM { static inline const char *getAMSubModeAltStr(AMSubMode Mode, bool isLD) { switch (Mode) { - default: llvm_unreachable("Unknown addressing sub-mode!"); + default: assert(0 && "Unknown addressing sub-mode!"); case ARM_AM::ia: return isLD ? "fd" : "ea"; case ARM_AM::ib: return isLD ? "ed" : "fa"; case ARM_AM::da: return isLD ? "fa" : "ed"; @@ -342,6 +341,66 @@ namespace ARM_AM { return -1; } + static inline unsigned getT2SOImmValRotate(unsigned V) { + if ((V & ~255U) == 0) return 0; + // Use CTZ to compute the rotate amount. + unsigned RotAmt = CountTrailingZeros_32(V); + return (32 - RotAmt) & 31; + } + + static inline bool isT2SOImmTwoPartVal (unsigned Imm) { + unsigned V = Imm; + // Passing values can be any combination of splat values and shifter + // values. If this can be handled with a single shifter or splat, bail + // out. Those should be handled directly, not with a two-part val. + if (getT2SOImmValSplatVal(V) != -1) + return false; + V = rotr32 (~255U, getT2SOImmValRotate(V)) & V; + if (V == 0) + return false; + + // If this can be handled as an immediate, accept. + if (getT2SOImmVal(V) != -1) return true; + + // Likewise, try masking out a splat value first. + V = Imm; + if (getT2SOImmValSplatVal(V & 0xff00ff00U) != -1) + V &= ~0xff00ff00U; + else if (getT2SOImmValSplatVal(V & 0x00ff00ffU) != -1) + V &= ~0x00ff00ffU; + // If what's left can be handled as an immediate, accept. + if (getT2SOImmVal(V) != -1) return true; + + // Otherwise, do not accept. + return false; + } + + static inline unsigned getT2SOImmTwoPartFirst(unsigned Imm) { + assert (isT2SOImmTwoPartVal(Imm) && + "Immedate cannot be encoded as two part immediate!"); + // Try a shifter operand as one part + unsigned V = rotr32 (~255, getT2SOImmValRotate(Imm)) & Imm; + // If the rest is encodable as an immediate, then return it. + if (getT2SOImmVal(V) != -1) return V; + + // Try masking out a splat value first. + if (getT2SOImmValSplatVal(Imm & 0xff00ff00U) != -1) + return Imm & 0xff00ff00U; + + // The other splat is all that's left as an option. + assert (getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1); + return Imm & 0x00ff00ffU; + } + + static inline unsigned getT2SOImmTwoPartSecond(unsigned Imm) { + // Mask out the first hunk + Imm ^= getT2SOImmTwoPartFirst(Imm); + // Return what's left + assert (getT2SOImmVal(Imm) != -1 && + "Unable to encode second part of T2 two part SO immediate"); + return Imm; + } + //===--------------------------------------------------------------------===// // Addressing Mode #2 diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 42ef183..00e7531 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -36,8 +36,18 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; +static cl::opt<bool> +ScavengeFrameIndexVals("arm-virtual-frame-index-vals", cl::Hidden, + cl::init(false), + cl::desc("Resolve frame index values via scavenging in PEI")); + +static cl::opt<bool> +ReuseFrameIndexVals("arm-reuse-frame-index-vals", cl::Hidden, cl::init(false), + cl::desc("Reuse repeated frame index values")); + unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum, bool *isSPVFP) { if (isSPVFP) @@ -740,8 +750,7 @@ unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg, case ARM::R1: return ARM::R0; case ARM::R3: - // FIXME! - return STI.isThumb1Only() ? 0 : ARM::R2; + return ARM::R2; case ARM::R5: return ARM::R4; case ARM::R7: @@ -830,8 +839,7 @@ unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg, case ARM::R0: return ARM::R1; case ARM::R2: - // FIXME! - return STI.isThumb1Only() ? 0 : ARM::R3; + return ARM::R3; case ARM::R4: return ARM::R5; case ARM::R6: @@ -937,6 +945,11 @@ requiresRegisterScavenging(const MachineFunction &MF) const { return true; } +bool ARMBaseRegisterInfo:: +requiresFrameIndexScavenging(const MachineFunction &MF) const { + return ScavengeFrameIndexVals; +} + // hasReservedCallFrame - Under normal circumstances, when a frame pointer is // not required, we reserve argument space for call sites in the function // immediately on entry to the current function. This eliminates the need for @@ -1077,14 +1090,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4) && "This code isn't needed if offset already handled!"); - // Insert a set of r12 with the full address: r12 = sp + offset - // If the offset we have is too large to fit into the instruction, we need - // to form it with a series of ADDri's. Do this by taking 8-bit chunks - // out of 'Offset'. - unsigned ScratchReg = findScratchRegister(RS, ARM::GPRRegisterClass, AFI); - if (ScratchReg == 0) - // No register is "free". Scavenge a register. - ScratchReg = RS->scavengeRegister(ARM::GPRRegisterClass, II, SPAdj); + unsigned ScratchReg = 0; int PIdx = MI.findFirstPredOperandIdx(); ARMCC::CondCodes Pred = (PIdx == -1) ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm(); @@ -1093,6 +1099,19 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Must be addrmode4. MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false); else { + if (!ScavengeFrameIndexVals) { + // Insert a set of r12 with the full address: r12 = sp + offset + // If the offset we have is too large to fit into the instruction, we need + // to form it with a series of ADDri's. Do this by taking 8-bit chunks + // out of 'Offset'. + ScratchReg = findScratchRegister(RS, ARM::GPRRegisterClass, AFI); + if (ScratchReg == 0) + // No register is "free". Scavenge a register. + ScratchReg = RS->scavengeRegister(ARM::GPRRegisterClass, II, SPAdj); + } else { + ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass); + *Value = Offset; + } if (!AFI->isThumbFunction()) emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, Pred, PredReg, TII); @@ -1102,8 +1121,10 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset, Pred, PredReg, TII); } MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); + if (!ReuseFrameIndexVals || !ScavengeFrameIndexVals) + ScratchReg = 0; } - return 0; + return ScratchReg; } /// Move iterator pass the next bunch of callee save load / store ops for diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index da703fb..f7d38e5 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -122,6 +122,8 @@ public: virtual bool requiresRegisterScavenging(const MachineFunction &MF) const; + virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const; + virtual bool hasReservedCallFrame(MachineFunction &MF) const; virtual void eliminateCallFramePseudoInstr(MachineFunction &MF, diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index b0bd04b..c995ff2 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -28,9 +28,11 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include <algorithm> using namespace llvm; STATISTIC(NumCPEs, "Number of constpool entries"); @@ -70,6 +72,10 @@ namespace { /// to a return, unreachable, or unconditional branch). std::vector<MachineBasicBlock*> WaterList; + /// NewWaterList - The subset of WaterList that was created since the + /// previous iteration by inserting unconditional branches. + SmallSet<MachineBasicBlock*, 4> NewWaterList; + typedef std::vector<MachineBasicBlock*>::iterator water_iterator; /// CPUser - One user of a constant pool, keeping the machine instruction @@ -175,9 +181,7 @@ namespace { void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta); bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI); int LookForExistingCPEntry(CPUser& U, unsigned UserOffset); - bool LookForWater(CPUser&U, unsigned UserOffset, - MachineBasicBlock *&NewMBB); - MachineBasicBlock *AcceptWater(water_iterator IP); + bool LookForWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter); void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset, MachineBasicBlock *&NewMBB); bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex); @@ -297,6 +301,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { if (CPChange && ++NoCPIters > 30) llvm_unreachable("Constant Island pass failed to converge!"); DEBUG(dumpBBs()); + + // Clear NewWaterList now. If we split a block for branches, it should + // appear as "new water" for the next iteration of constant pool placement. + NewWaterList.clear(); bool BRChange = false; for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) @@ -629,7 +637,7 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) { /// Split the basic block containing MI into two blocks, which are joined by -/// an unconditional branch. Update datastructures and renumber blocks to +/// an unconditional branch. Update data structures and renumber blocks to /// account for this change and returns the newly created block. MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { MachineBasicBlock *OrigBB = MI->getParent(); @@ -691,6 +699,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { WaterList.insert(next(IP), NewBB); else WaterList.insert(IP, OrigBB); + NewWaterList.insert(OrigBB); // Figure out how large the first NewMBB is. (It cannot // contain a constpool_entry or tablejump.) @@ -941,30 +950,16 @@ static inline unsigned getUnconditionalBrDisp(int Opc) { return ((1<<23)-1)*4; } -/// AcceptWater - Small amount of common code factored out of the following. -/// -MachineBasicBlock *ARMConstantIslands::AcceptWater(water_iterator IP) { - DEBUG(errs() << "found water in range\n"); - MachineBasicBlock *WaterBB = *IP; - // Remove the original WaterList entry; we want subsequent - // insertions in this vicinity to go after the one we're - // about to insert. This considerably reduces the number - // of times we have to move the same CPE more than once. - WaterList.erase(IP); - // CPE goes before following block (NewMBB). - return next(MachineFunction::iterator(WaterBB)); -} - -/// LookForWater - look for an existing entry in the WaterList in which +/// LookForWater - Look for an existing entry in the WaterList in which /// we can place the CPE referenced from U so it's within range of U's MI. -/// Returns true if found, false if not. If it returns true, NewMBB +/// Returns true if found, false if not. If it returns true, WaterIter /// is set to the WaterList entry. For Thumb, prefer water that will not /// introduce padding to water that will. To ensure that this pass /// terminates, the CPE location for a particular CPUser is only allowed to /// move to a lower address, so search backward from the end of the list and /// prefer the first water that is in range. bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, - MachineBasicBlock *&NewMBB) { + water_iterator &WaterIter) { if (WaterList.empty()) return false; @@ -973,9 +968,17 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();; --IP) { MachineBasicBlock* WaterBB = *IP; - // Check if water is in range and at a lower address than the current one. - if (WaterBB->getNumber() < U.HighWaterMark->getNumber() && - WaterIsInRange(UserOffset, WaterBB, U)) { + // Check if water is in range and is either at a lower address than the + // current "high water mark" or a new water block that was created since + // the previous iteration by inserting an unconditional branch. In the + // latter case, we want to allow resetting the high water mark back to + // this new water since we haven't seen it before. Inserting branches + // should be relatively uncommon and when it does happen, we want to be + // sure to take advantage of it for all the CPEs near that block, so that + // we don't insert more branches than necessary. + if (WaterIsInRange(UserOffset, WaterBB, U) && + (WaterBB->getNumber() < U.HighWaterMark->getNumber() || + NewWaterList.count(WaterBB))) { unsigned WBBId = WaterBB->getNumber(); if (isThumb && (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) { @@ -986,7 +989,7 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, IPThatWouldPad = IP; } } else { - NewMBB = AcceptWater(IP); + WaterIter = IP; return true; } } @@ -994,7 +997,7 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, break; } if (FoundWaterThatWouldPad) { - NewMBB = AcceptWater(IPThatWouldPad); + WaterIter = IPThatWouldPad; return true; } return false; @@ -1107,7 +1110,6 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, MachineInstr *CPEMI = U.CPEMI; unsigned CPI = CPEMI->getOperand(1).getIndex(); unsigned Size = CPEMI->getOperand(2).getImm(); - MachineBasicBlock *NewMBB; // Compute this only once, it's expensive. The 4 or 8 is the value the // hardware keeps in the PC. unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8); @@ -1123,14 +1125,50 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, unsigned ID = AFI->createConstPoolEntryUId(); // Look for water where we can place this CPE. - if (!LookForWater(U, UserOffset, NewMBB)) { + MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock(); + MachineBasicBlock *NewMBB; + water_iterator IP; + if (LookForWater(U, UserOffset, IP)) { + DEBUG(errs() << "found water in range\n"); + MachineBasicBlock *WaterBB = *IP; + + // If the original WaterList entry was "new water" on this iteration, + // propagate that to the new island. This is just keeping NewWaterList + // updated to match the WaterList, which will be updated below. + if (NewWaterList.count(WaterBB)) { + NewWaterList.erase(WaterBB); + NewWaterList.insert(NewIsland); + } + // The new CPE goes before the following block (NewMBB). + NewMBB = next(MachineFunction::iterator(WaterBB)); + + } else { // No water found. DEBUG(errs() << "No water found\n"); CreateNewWater(CPUserIndex, UserOffset, NewMBB); + + // SplitBlockBeforeInstr adds to WaterList, which is important when it is + // called while handling branches so that the water will be seen on the + // next iteration for constant pools, but in this context, we don't want + // it. Check for this so it will be removed from the WaterList. + // Also remove any entry from NewWaterList. + MachineBasicBlock *WaterBB = prior(MachineFunction::iterator(NewMBB)); + IP = std::find(WaterList.begin(), WaterList.end(), WaterBB); + if (IP != WaterList.end()) + NewWaterList.erase(WaterBB); + + // We are adding new water. Update NewWaterList. + NewWaterList.insert(NewIsland); } + // Remove the original WaterList entry; we want subsequent insertions in + // this vicinity to go after the one we're about to insert. This + // considerably reduces the number of times we have to move the same CPE + // more than once and is also important to ensure the algorithm terminates. + if (IP != WaterList.end()) + WaterList.erase(IP); + // Okay, we know we can put an island before NewMBB now, do it! - MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock(); MF.insert(NewMBB, NewIsland); // Update internal data structures to account for the newly inserted MBB. diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index c39de0a..5c1835b 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1287,7 +1287,7 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDValue Op, assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); unsigned Width = 32 - Srl_imm; int LSB = Srl_imm - Shl_imm; - if ((LSB + Width) > 32) + if (LSB < 0) return NULL; SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { Op.getOperand(0).getOperand(0), @@ -1427,6 +1427,43 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { } } break; + case ISD::AND: { + // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits + // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits + // are entirely contributed by c2 and lower 16-bits are entirely contributed + // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). + // Select it to: "movt x, ((c1 & 0xffff) >> 16) + EVT VT = Op.getValueType(); + if (VT != MVT::i32) + break; + unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) + ? ARM::t2MOVTi16 + : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); + if (!Opc) + break; + SDValue N0 = Op.getOperand(0), N1 = Op.getOperand(1); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + if (!N1C) + break; + if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { + SDValue N2 = N0.getOperand(1); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); + if (!N2C) + break; + unsigned N1CVal = N1C->getZExtValue(); + unsigned N2CVal = N2C->getZExtValue(); + if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && + (N1CVal & 0xffffU) == 0xffffU && + (N2CVal & 0xffffU) == 0x0U) { + SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, + MVT::i32); + SDValue Ops[] = { N0.getOperand(0), Imm16, + getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->getMachineNode(Opc, dl, VT, Ops, 4); + } + } + break; + } case ARMISD::FMRRD: return CurDAG->getMachineNode(ARM::FMRRD, dl, MVT::i32, MVT::i32, Op.getOperand(0), getAL(CurDAG), diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 426cecb..6a264fd 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -25,9 +25,10 @@ #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/Function.h" +#include "llvm/GlobalValue.h" #include "llvm/Instruction.h" #include "llvm/Intrinsics.h" -#include "llvm/GlobalValue.h" +#include "llvm/Type.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -2360,8 +2361,11 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && "Only possible block sizes for VREV are: 16, 32, 64"); - unsigned NumElts = VT.getVectorNumElements(); unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + if (EltSz == 64) + return false; + + unsigned NumElts = VT.getVectorNumElements(); unsigned BlockElts = M[0] + 1; if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) @@ -2378,6 +2382,10 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, unsigned &WhichResult) { + unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + if (EltSz == 64) + return false; + unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i < NumElts; i += 2) { @@ -2390,6 +2398,10 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, unsigned &WhichResult) { + unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + if (EltSz == 64) + return false; + unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i != NumElts; ++i) { @@ -2398,7 +2410,7 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, } // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. - if (VT.is64BitVector() && VT.getVectorElementType().getSizeInBits() == 32) + if (VT.is64BitVector() && EltSz == 32) return false; return true; @@ -2406,6 +2418,10 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, unsigned &WhichResult) { + unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + if (EltSz == 64) + return false; + unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); unsigned Idx = WhichResult * NumElts / 2; @@ -2417,7 +2433,7 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, } // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. - if (VT.is64BitVector() && VT.getVectorElementType().getSizeInBits() == 32) + if (VT.is64BitVector() && EltSz == 32) return false; return true; @@ -2695,18 +2711,10 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); SDValue Vec = Op.getOperand(0); SDValue Lane = Op.getOperand(1); - - // FIXME: This is invalid for 8 and 16-bit elements - the information about - // sign / zero extension is lost! - Op = DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); - Op = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Op, DAG.getValueType(VT)); - - if (VT.bitsLT(MVT::i32)) - Op = DAG.getNode(ISD::TRUNCATE, dl, VT, Op); - else if (VT.bitsGT(MVT::i32)) - Op = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op); - - return Op; + assert(VT == MVT::i32 && + Vec.getValueType().getVectorElementType().getSizeInBits() < 32 && + "unexpected type for custom-lowering vector extract"); + return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); } static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { @@ -3029,7 +3037,6 @@ static SDValue PerformSUBCombine(SDNode *N, return SDValue(); } - /// PerformFMRRDCombine - Target-specific dag combine xforms for ARMISD::FMRRD. static SDValue PerformFMRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 3d19f23..8225fd7 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1220,6 +1220,10 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, string asm, string cstr, list<dag> pattern> : NeonI<oops, iops, AddrMode6, IndexModeNone, itin, asm, cstr, pattern> { let Inst{31-24} = 0b11110100; + let Inst{23} = op23; + let Inst{21-20} = op21_20; + let Inst{11-8} = op11_8; + let Inst{7-4} = op7_4; } class NDataI<dag oops, dag iops, InstrItinClass itin, @@ -1258,15 +1262,26 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, let Inst{4} = op4; } +// NEON Vector Duplicate (scalar). +// Inst{19-16} is specified by subclasses. +class N2VDup<bits<2> op24_23, bits<2> op21_20, bits<5> op11_7, bit op6, bit op4, + dag oops, dag iops, InstrItinClass itin, + string asm, string cstr, list<dag> pattern> + : NDataI<oops, iops, itin, asm, cstr, pattern> { + let Inst{24-23} = op24_23; + let Inst{21-20} = op21_20; + let Inst{11-7} = op11_7; + let Inst{6} = op6; + let Inst{4} = op4; +} + // NEON 2 vector register with immediate. -class N2VImm<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op6, bit op4, +class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, string asm, string cstr, list<dag> pattern> : NDataI<oops, iops, itin, asm, cstr, pattern> { let Inst{24} = op24; let Inst{23} = op23; - let Inst{21-16} = op21_16; let Inst{11-8} = op11_8; let Inst{7} = op7; let Inst{6} = op6; @@ -1286,6 +1301,20 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, let Inst{4} = op4; } +// NEON 3 vector register with immediate. This is only used for VEXT where +// op11_8 represents the starting byte index of the extracted result in the +// concatenation of the operands and is left unspecified. +class N3VImm<bit op24, bit op23, bits<2> op21_20, bit op6, bit op4, + dag oops, dag iops, InstrItinClass itin, + string asm, string cstr, list<dag> pattern> + : NDataI<oops, iops, itin, asm, cstr, pattern> { + let Inst{24} = op24; + let Inst{23} = op23; + let Inst{21-20} = op21_20; + let Inst{6} = op6; + let Inst{4} = op4; +} + // NEON VMOVs between scalar and core registers. class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, Format f, InstrItinClass itin, diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 4c92891..dd4123b 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -22,7 +22,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Support/CommandLine.h" using namespace llvm; ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 6ec78bc..384b98c 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -980,6 +980,9 @@ def MOVTi16 : AI1<0b1010, (outs GPR:$dst), (ins GPR:$src, i32imm:$imm), let Inst{25} = 1; } +def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>, + Requires<[IsARM, HasV6T2]>; + let Uses = [CPSR] in def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi, "mov", " $dst, $src, rrx", @@ -1580,10 +1583,17 @@ def : ARMPat<(or GPR:$LHS, so_imm2part:$RHS), def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS), (EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)), (so_imm2part_2 imm:$RHS))>; +def : ARMPat<(add GPR:$LHS, so_imm2part:$RHS), + (ADDri (ADDri GPR:$LHS, (so_imm2part_1 imm:$RHS)), + (so_imm2part_2 imm:$RHS))>; +def : ARMPat<(sub GPR:$LHS, so_imm2part:$RHS), + (SUBri (SUBri GPR:$LHS, (so_imm2part_1 imm:$RHS)), + (so_imm2part_2 imm:$RHS))>; // 32-bit immediate using movw + movt. -// This is a single pseudo instruction to make it re-materializable. Remove -// when we can do generalized remat. +// This is a single pseudo instruction, the benefit is that it can be remat'd +// as a single unit instead of having to handle reg inputs. +// FIXME: Remove this when we can do generalized remat. let isReMaterializable = 1 in def MOVi32imm : AI1x2<(outs GPR:$dst), (ins i32imm:$src), Pseudo, IIC_iMOVi, "movw", " $dst, ${src:lo16}\n\tmovt${p} $dst, ${src:hi16}", diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index b34aff7..822950c 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -456,17 +456,17 @@ class VST2LN<bits<4> op11_8, string OpcodeStr> !strconcat(OpcodeStr, "\t\\{$src1[$lane],$src2[$lane]\\}, $addr"), "", []>; -def VST2LNd8 : VST2LN<0b0000, "vst2.8">; -def VST2LNd16 : VST2LN<0b0100, "vst2.16">; -def VST2LNd32 : VST2LN<0b1000, "vst2.32">; +def VST2LNd8 : VST2LN<0b0001, "vst2.8">; +def VST2LNd16 : VST2LN<0b0101, "vst2.16">; +def VST2LNd32 : VST2LN<0b1001, "vst2.32">; // vst2 to double-spaced even registers. -def VST2LNq16a: VST2LN<0b0100, "vst2.16">; -def VST2LNq32a: VST2LN<0b1000, "vst2.32">; +def VST2LNq16a: VST2LN<0b0101, "vst2.16">; +def VST2LNq32a: VST2LN<0b1001, "vst2.32">; // vst2 to double-spaced odd registers. -def VST2LNq16b: VST2LN<0b0100, "vst2.16">; -def VST2LNq32b: VST2LN<0b1000, "vst2.32">; +def VST2LNq16b: VST2LN<0b0101, "vst2.16">; +def VST2LNq32b: VST2LN<0b1001, "vst2.32">; // VST3LN : Vector Store (single 3-element structure from one lane) class VST3LN<bits<4> op11_8, string OpcodeStr> @@ -623,12 +623,12 @@ class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, (ins QPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>; -// Long 2-register intrinsics. (This is currently only used for VMOVL and is -// derived from N2VImm instead of N2V because of the way the size is encoded.) -class N2VLInt<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op6, bit op4, InstrItinClass itin, string OpcodeStr, +// Long 2-register intrinsics (currently only used for VMOVL). +class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, ValueType TyQ, ValueType TyD, Intrinsic IntOp> - : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, (outs QPR:$dst), + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$dst), (ins DPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>; @@ -1016,36 +1016,33 @@ class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, // Shift by immediate, // both double- and quad-register. -class N2VDSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op4, InstrItinClass itin, string OpcodeStr, - ValueType Ty, SDNode OpNode> - : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, +class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, + InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode OpNode> + : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), itin, !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", [(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>; -class N2VQSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op4, InstrItinClass itin, string OpcodeStr, - ValueType Ty, SDNode OpNode> - : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, +class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, + InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode OpNode> + : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin, !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", [(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>; // Long shift by immediate. -class N2VLSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op6, bit op4, string OpcodeStr, ValueType ResTy, - ValueType OpTy, SDNode OpNode> - : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, +class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, + string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode> + : N2VImm<op24, op23, op11_8, op7, op6, op4, (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", [(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src), (i32 imm:$SIMM))))]>; // Narrow shift by immediate. -class N2VNSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op6, bit op4, InstrItinClass itin, string OpcodeStr, +class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode> - : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, + : N2VImm<op24, op23, op11_8, op7, op6, op4, (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin, !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", [(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src), @@ -1053,53 +1050,49 @@ class N2VNSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, // Shift right by immediate and accumulate, // both double- and quad-register. -class N2VDShAdd<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> - : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), - IIC_VPALiD, +class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, + string OpcodeStr, ValueType Ty, SDNode ShOp> + : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), + (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), IIC_VPALiD, !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", [(set DPR:$dst, (Ty (add DPR:$src1, (Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>; -class N2VQShAdd<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> - : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), - IIC_VPALiD, +class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, + string OpcodeStr, ValueType Ty, SDNode ShOp> + : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), + (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), IIC_VPALiD, !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", [(set QPR:$dst, (Ty (add QPR:$src1, (Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>; // Shift by immediate and insert, // both double- and quad-register. -class N2VDShIns<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> - : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), - IIC_VSHLiD, +class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, + string OpcodeStr, ValueType Ty, SDNode ShOp> + : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), + (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", [(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>; -class N2VQShIns<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> - : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), - IIC_VSHLiQ, +class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, + string OpcodeStr, ValueType Ty, SDNode ShOp> + : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), + (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), IIC_VSHLiQ, !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", [(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>; // Convert, with fractional bits immediate, // both double- and quad-register. -class N2VCvtD<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, +class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, + string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, + : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VUNAD, !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>; -class N2VCvtQ<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, +class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, + string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, + : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), IIC_VUNAQ, !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>; @@ -1175,14 +1168,14 @@ multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). // source operand element sizes of 16, 32 and 64 bits: -multiclass N2VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, - bit op4, string OpcodeStr, Intrinsic IntOp> { - def v8i16 : N2VLInt<op24, op23, 0b001000, op11_8, op7, op6, op4, - IIC_VQUNAiD, !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>; - def v4i32 : N2VLInt<op24, op23, 0b010000, op11_8, op7, op6, op4, - IIC_VQUNAiD, !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; - def v2i64 : N2VLInt<op24, op23, 0b100000, op11_8, op7, op6, op4, - IIC_VQUNAiD, !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; +multiclass N2VLInt_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, + string OpcodeStr, Intrinsic IntOp> { + def v8i16 : N2VLInt<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, + !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>; + def v4i32 : N2VLInt<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, + !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; + def v2i64 : N2VLInt<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, + !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; } @@ -1381,7 +1374,7 @@ multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, string OpcodeStr, Intrinsic IntOp> : N3VLInt3_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp> { - def v8i16 : N3VLInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, + def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D, !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>; } @@ -1461,24 +1454,38 @@ multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, SDNode OpNode> { // 64-bit vector types. - def v8i8 : N2VDSh<op24, op23, 0b001000, op11_8, 0, op4, itin, - !strconcat(OpcodeStr, "8"), v8i8, OpNode>; - def v4i16 : N2VDSh<op24, op23, 0b010000, op11_8, 0, op4, itin, - !strconcat(OpcodeStr, "16"), v4i16, OpNode>; - def v2i32 : N2VDSh<op24, op23, 0b100000, op11_8, 0, op4, itin, - !strconcat(OpcodeStr, "32"), v2i32, OpNode>; - def v1i64 : N2VDSh<op24, op23, 0b000000, op11_8, 1, op4, itin, + def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, itin, + !strconcat(OpcodeStr, "8"), v8i8, OpNode> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, itin, + !strconcat(OpcodeStr, "16"), v4i16, OpNode> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, itin, + !strconcat(OpcodeStr, "32"), v2i32, OpNode> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, itin, !strconcat(OpcodeStr, "64"), v1i64, OpNode>; + // imm6 = xxxxxx // 128-bit vector types. - def v16i8 : N2VQSh<op24, op23, 0b001000, op11_8, 0, op4, itin, - !strconcat(OpcodeStr, "8"), v16i8, OpNode>; - def v8i16 : N2VQSh<op24, op23, 0b010000, op11_8, 0, op4, itin, - !strconcat(OpcodeStr, "16"), v8i16, OpNode>; - def v4i32 : N2VQSh<op24, op23, 0b100000, op11_8, 0, op4, itin, - !strconcat(OpcodeStr, "32"), v4i32, OpNode>; - def v2i64 : N2VQSh<op24, op23, 0b000000, op11_8, 1, op4, itin, + def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, itin, + !strconcat(OpcodeStr, "8"), v16i8, OpNode> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, itin, + !strconcat(OpcodeStr, "16"), v8i16, OpNode> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, itin, + !strconcat(OpcodeStr, "32"), v4i32, OpNode> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, itin, !strconcat(OpcodeStr, "64"), v2i64, OpNode>; + // imm6 = xxxxxx } @@ -1487,24 +1494,38 @@ multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, string OpcodeStr, SDNode ShOp> { // 64-bit vector types. - def v8i8 : N2VDShAdd<op24, op23, 0b001000, op11_8, 0, op4, - !strconcat(OpcodeStr, "8"), v8i8, ShOp>; - def v4i16 : N2VDShAdd<op24, op23, 0b010000, op11_8, 0, op4, - !strconcat(OpcodeStr, "16"), v4i16, ShOp>; - def v2i32 : N2VDShAdd<op24, op23, 0b100000, op11_8, 0, op4, - !strconcat(OpcodeStr, "32"), v2i32, ShOp>; - def v1i64 : N2VDShAdd<op24, op23, 0b000000, op11_8, 1, op4, + def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "8"), v8i8, ShOp> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "16"), v4i16, ShOp> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "32"), v2i32, ShOp> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, !strconcat(OpcodeStr, "64"), v1i64, ShOp>; + // imm6 = xxxxxx // 128-bit vector types. - def v16i8 : N2VQShAdd<op24, op23, 0b001000, op11_8, 0, op4, - !strconcat(OpcodeStr, "8"), v16i8, ShOp>; - def v8i16 : N2VQShAdd<op24, op23, 0b010000, op11_8, 0, op4, - !strconcat(OpcodeStr, "16"), v8i16, ShOp>; - def v4i32 : N2VQShAdd<op24, op23, 0b100000, op11_8, 0, op4, - !strconcat(OpcodeStr, "32"), v4i32, ShOp>; - def v2i64 : N2VQShAdd<op24, op23, 0b000000, op11_8, 1, op4, + def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "8"), v16i8, ShOp> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "16"), v8i16, ShOp> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "32"), v4i32, ShOp> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, !strconcat(OpcodeStr, "64"), v2i64, ShOp>; + // imm6 = xxxxxx } @@ -1513,24 +1534,75 @@ multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, string OpcodeStr, SDNode ShOp> { // 64-bit vector types. - def v8i8 : N2VDShIns<op24, op23, 0b001000, op11_8, 0, op4, - !strconcat(OpcodeStr, "8"), v8i8, ShOp>; - def v4i16 : N2VDShIns<op24, op23, 0b010000, op11_8, 0, op4, - !strconcat(OpcodeStr, "16"), v4i16, ShOp>; - def v2i32 : N2VDShIns<op24, op23, 0b100000, op11_8, 0, op4, - !strconcat(OpcodeStr, "32"), v2i32, ShOp>; - def v1i64 : N2VDShIns<op24, op23, 0b000000, op11_8, 1, op4, + def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "8"), v8i8, ShOp> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "16"), v4i16, ShOp> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "32"), v2i32, ShOp> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, !strconcat(OpcodeStr, "64"), v1i64, ShOp>; + // imm6 = xxxxxx // 128-bit vector types. - def v16i8 : N2VQShIns<op24, op23, 0b001000, op11_8, 0, op4, - !strconcat(OpcodeStr, "8"), v16i8, ShOp>; - def v8i16 : N2VQShIns<op24, op23, 0b010000, op11_8, 0, op4, - !strconcat(OpcodeStr, "16"), v8i16, ShOp>; - def v4i32 : N2VQShIns<op24, op23, 0b100000, op11_8, 0, op4, - !strconcat(OpcodeStr, "32"), v4i32, ShOp>; - def v2i64 : N2VQShIns<op24, op23, 0b000000, op11_8, 1, op4, + def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "8"), v16i8, ShOp> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "16"), v8i16, ShOp> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "32"), v4i32, ShOp> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, !strconcat(OpcodeStr, "64"), v2i64, ShOp>; + // imm6 = xxxxxx +} + +// Neon Shift Long operations, +// element sizes of 8, 16, 32 bits: +multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, + bit op4, string OpcodeStr, SDNode OpNode> { + def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, + !strconcat(OpcodeStr, "8"), v8i16, v8i8, OpNode> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, + !strconcat(OpcodeStr, "16"), v4i32, v4i16, OpNode> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, + !strconcat(OpcodeStr, "32"), v2i64, v2i32, OpNode> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } +} + +// Neon Shift Narrow operations, +// element sizes of 16, 32, 64 bits: +multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, + bit op4, InstrItinClass itin, string OpcodeStr, + SDNode OpNode> { + def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, + !strconcat(OpcodeStr, "16"), v8i8, v8i16, OpNode> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, + !strconcat(OpcodeStr, "32"), v4i16, v4i32, OpNode> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, + !strconcat(OpcodeStr, "64"), v2i32, v2i64, OpNode> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } } //===----------------------------------------------------------------------===// @@ -1903,8 +1975,8 @@ defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q, "vabdl.s", int_arm_neon_v defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q, "vabdl.u", int_arm_neon_vabdlu, 0>; // VABA : Vector Absolute Difference and Accumulate -defm VABAs : N3VInt3_QHS<0,1,0b0101,0, "vaba.s", int_arm_neon_vabas>; -defm VABAu : N3VInt3_QHS<1,1,0b0101,0, "vaba.u", int_arm_neon_vabau>; +defm VABAs : N3VInt3_QHS<0,0,0b0111,1, "vaba.s", int_arm_neon_vabas>; +defm VABAu : N3VInt3_QHS<1,0,0b0111,1, "vaba.u", int_arm_neon_vabau>; // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, "vabal.s", int_arm_neon_vabals>; @@ -2044,34 +2116,25 @@ defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr.s", NEONvshrs>; defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr.u", NEONvshru>; // VSHLL : Vector Shift Left Long -def VSHLLs8 : N2VLSh<0, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.s8", - v8i16, v8i8, NEONvshlls>; -def VSHLLs16 : N2VLSh<0, 1, 0b010000, 0b1010, 0, 0, 1, "vshll.s16", - v4i32, v4i16, NEONvshlls>; -def VSHLLs32 : N2VLSh<0, 1, 0b100000, 0b1010, 0, 0, 1, "vshll.s32", - v2i64, v2i32, NEONvshlls>; -def VSHLLu8 : N2VLSh<1, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.u8", - v8i16, v8i8, NEONvshllu>; -def VSHLLu16 : N2VLSh<1, 1, 0b010000, 0b1010, 0, 0, 1, "vshll.u16", - v4i32, v4i16, NEONvshllu>; -def VSHLLu32 : N2VLSh<1, 1, 0b100000, 0b1010, 0, 0, 1, "vshll.u32", - v2i64, v2i32, NEONvshllu>; +defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll.s", NEONvshlls>; +defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll.u", NEONvshllu>; // VSHLL : Vector Shift Left Long (with maximum shift count) -def VSHLLi8 : N2VLSh<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll.i8", - v8i16, v8i8, NEONvshlli>; -def VSHLLi16 : N2VLSh<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll.i16", - v4i32, v4i16, NEONvshlli>; -def VSHLLi32 : N2VLSh<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll.i32", - v2i64, v2i32, NEONvshlli>; +class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, + bit op6, bit op4, string OpcodeStr, ValueType ResTy, + ValueType OpTy, SDNode OpNode> + : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, ResTy, OpTy, OpNode> { + let Inst{21-16} = op21_16; +} +def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll.i8", + v8i16, v8i8, NEONvshlli>; +def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll.i16", + v4i32, v4i16, NEONvshlli>; +def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll.i32", + v2i64, v2i32, NEONvshlli>; // VSHRN : Vector Shift Right and Narrow -def VSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 0, 1, - IIC_VSHLiD, "vshrn.i16", v8i8, v8i16, NEONvshrn>; -def VSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 0, 1, - IIC_VSHLiD, "vshrn.i32", v4i16, v4i32, NEONvshrn>; -def VSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 0, 1, - IIC_VSHLiD, "vshrn.i64", v2i32, v2i64, NEONvshrn>; +defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn.i", NEONvshrn>; // VRSHL : Vector Rounding Shift defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, @@ -2083,12 +2146,8 @@ defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.s", NEONvrshrs>; defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.u", NEONvrshru>; // VRSHRN : Vector Rounding Shift Right and Narrow -def VRSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 1, 1, - IIC_VSHLi4D, "vrshrn.i16", v8i8, v8i16, NEONvrshrn>; -def VRSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 1, 1, - IIC_VSHLi4D, "vrshrn.i32", v4i16, v4i32, NEONvrshrn>; -def VRSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 1, 1, - IIC_VSHLi4D, "vrshrn.i64", v2i32, v2i64, NEONvrshrn>; +defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn.i", + NEONvrshrn>; // VQSHL : Vector Saturating Shift defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, @@ -2102,26 +2161,14 @@ defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.u", NEONvqshlu>; defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu.s", NEONvqshlsu>; // VQSHRN : Vector Saturating Shift Right and Narrow -def VQSHRNs16 : N2VNSh<0, 1, 0b001000, 0b1001, 0, 0, 1, - IIC_VSHLi4D, "vqshrn.s16", v8i8, v8i16, NEONvqshrns>; -def VQSHRNs32 : N2VNSh<0, 1, 0b010000, 0b1001, 0, 0, 1, - IIC_VSHLi4D, "vqshrn.s32", v4i16, v4i32, NEONvqshrns>; -def VQSHRNs64 : N2VNSh<0, 1, 0b100000, 0b1001, 0, 0, 1, - IIC_VSHLi4D, "vqshrn.s64", v2i32, v2i64, NEONvqshrns>; -def VQSHRNu16 : N2VNSh<1, 1, 0b001000, 0b1001, 0, 0, 1, - IIC_VSHLi4D, "vqshrn.u16", v8i8, v8i16, NEONvqshrnu>; -def VQSHRNu32 : N2VNSh<1, 1, 0b010000, 0b1001, 0, 0, 1, - IIC_VSHLi4D, "vqshrn.u32", v4i16, v4i32, NEONvqshrnu>; -def VQSHRNu64 : N2VNSh<1, 1, 0b100000, 0b1001, 0, 0, 1, - IIC_VSHLi4D, "vqshrn.u64", v2i32, v2i64, NEONvqshrnu>; +defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn.s", + NEONvqshrns>; +defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn.u", + NEONvqshrnu>; // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) -def VQSHRUN16 : N2VNSh<1, 1, 0b001000, 0b1000, 0, 0, 1, - IIC_VSHLi4D, "vqshrun.s16", v8i8, v8i16, NEONvqshrnsu>; -def VQSHRUN32 : N2VNSh<1, 1, 0b010000, 0b1000, 0, 0, 1, - IIC_VSHLi4D, "vqshrun.s32", v4i16, v4i32, NEONvqshrnsu>; -def VQSHRUN64 : N2VNSh<1, 1, 0b100000, 0b1000, 0, 0, 1, - IIC_VSHLi4D, "vqshrun.s64", v2i32, v2i64, NEONvqshrnsu>; +defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun.s", + NEONvqshrnsu>; // VQRSHL : Vector Saturating Rounding Shift defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, @@ -2130,26 +2177,14 @@ defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi IIC_VSHLi4Q, "vqrshl.u", int_arm_neon_vqrshiftu, 0>; // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow -def VQRSHRNs16: N2VNSh<0, 1, 0b001000, 0b1001, 0, 1, 1, - IIC_VSHLi4D, "vqrshrn.s16", v8i8, v8i16, NEONvqrshrns>; -def VQRSHRNs32: N2VNSh<0, 1, 0b010000, 0b1001, 0, 1, 1, - IIC_VSHLi4D, "vqrshrn.s32", v4i16, v4i32, NEONvqrshrns>; -def VQRSHRNs64: N2VNSh<0, 1, 0b100000, 0b1001, 0, 1, 1, - IIC_VSHLi4D, "vqrshrn.s64", v2i32, v2i64, NEONvqrshrns>; -def VQRSHRNu16: N2VNSh<1, 1, 0b001000, 0b1001, 0, 1, 1, - IIC_VSHLi4D, "vqrshrn.u16", v8i8, v8i16, NEONvqrshrnu>; -def VQRSHRNu32: N2VNSh<1, 1, 0b010000, 0b1001, 0, 1, 1, - IIC_VSHLi4D, "vqrshrn.u32", v4i16, v4i32, NEONvqrshrnu>; -def VQRSHRNu64: N2VNSh<1, 1, 0b100000, 0b1001, 0, 1, 1, - IIC_VSHLi4D, "vqrshrn.u64", v2i32, v2i64, NEONvqrshrnu>; +defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn.s", + NEONvqrshrns>; +defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn.u", + NEONvqrshrnu>; // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) -def VQRSHRUN16: N2VNSh<1, 1, 0b001000, 0b1000, 0, 1, 1, - IIC_VSHLi4D, "vqrshrun.s16", v8i8, v8i16, NEONvqrshrnsu>; -def VQRSHRUN32: N2VNSh<1, 1, 0b010000, 0b1000, 0, 1, 1, - IIC_VSHLi4D, "vqrshrun.s32", v4i16, v4i32, NEONvqrshrnsu>; -def VQRSHRUN64: N2VNSh<1, 1, 0b100000, 0b1000, 0, 1, 1, - IIC_VSHLi4D, "vqrshrun.s64", v2i32, v2i64, NEONvqrshrnsu>; +defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun.s", + NEONvqrshrnsu>; // VSRA : Vector Shift Right and Accumulate defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra.s", NEONvshrs>; @@ -2491,27 +2526,28 @@ def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), // VDUP : Vector Duplicate Lane (from scalar to all elements) -class VDUPLND<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, ValueType Ty> - : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0, +class VDUPLND<string OpcodeStr, ValueType Ty> + : N2VDup<0b11, 0b11, 0b11000, 0, 0, (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>; -class VDUPLNQ<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, - ValueType ResTy, ValueType OpTy> - : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0, +class VDUPLNQ<string OpcodeStr, ValueType ResTy, ValueType OpTy> + : N2VDup<0b11, 0b11, 0b11000, 1, 0, (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), imm:$lane)))]>; -def VDUPLN8d : VDUPLND<0b00, 0b01, "vdup.8", v8i8>; -def VDUPLN16d : VDUPLND<0b00, 0b10, "vdup.16", v4i16>; -def VDUPLN32d : VDUPLND<0b01, 0b00, "vdup.32", v2i32>; -def VDUPLNfd : VDUPLND<0b01, 0b00, "vdup.32", v2f32>; -def VDUPLN8q : VDUPLNQ<0b00, 0b01, "vdup.8", v16i8, v8i8>; -def VDUPLN16q : VDUPLNQ<0b00, 0b10, "vdup.16", v8i16, v4i16>; -def VDUPLN32q : VDUPLNQ<0b01, 0b00, "vdup.32", v4i32, v2i32>; -def VDUPLNfq : VDUPLNQ<0b01, 0b00, "vdup.32", v4f32, v2f32>; +// Inst{19-16} is partially specified depending on the element size. + +def VDUPLN8d : VDUPLND<"vdup.8", v8i8> { let Inst{16} = 1; } +def VDUPLN16d : VDUPLND<"vdup.16", v4i16> { let Inst{17-16} = 0b10; } +def VDUPLN32d : VDUPLND<"vdup.32", v2i32> { let Inst{18-16} = 0b100; } +def VDUPLNfd : VDUPLND<"vdup.32", v2f32> { let Inst{18-16} = 0b100; } +def VDUPLN8q : VDUPLNQ<"vdup.8", v16i8, v8i8> { let Inst{16} = 1; } +def VDUPLN16q : VDUPLNQ<"vdup.16", v8i16, v4i16> { let Inst{17-16} = 0b10; } +def VDUPLN32q : VDUPLNQ<"vdup.32", v4i32, v2i32> { let Inst{18-16} = 0b100; } +def VDUPLNfq : VDUPLNQ<"vdup.32", v4f32, v2f32> { let Inst{18-16} = 0b100; } def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, @@ -2530,15 +2566,19 @@ def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; -def VDUPfdf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 0, 0, - (outs DPR:$dst), (ins SPR:$src), - IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", - [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; +def VDUPfdf : N2VDup<0b11, 0b11, 0b11000, 0, 0, + (outs DPR:$dst), (ins SPR:$src), + IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", + [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]> { + let Inst{18-16} = 0b100; +} -def VDUPfqf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 1, 0, - (outs QPR:$dst), (ins SPR:$src), - IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", - [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; +def VDUPfqf : N2VDup<0b11, 0b11, 0b11000, 1, 0, + (outs QPR:$dst), (ins SPR:$src), + IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", + [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]> { + let Inst{18-16} = 0b100; +} def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)), (INSERT_SUBREG QPR:$src, @@ -2560,8 +2600,8 @@ defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, "vqmovn.u", defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, "vqmovun.s", int_arm_neon_vqmovnsu>; // VMOVL : Vector Lengthening Move -defm VMOVLs : N2VLInt_QHS<0,1,0b1010,0,0,1, "vmovl.s", int_arm_neon_vmovls>; -defm VMOVLu : N2VLInt_QHS<1,1,0b1010,0,0,1, "vmovl.u", int_arm_neon_vmovlu>; +defm VMOVLs : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl.s", int_arm_neon_vmovls>; +defm VMOVLu : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl.u", int_arm_neon_vmovlu>; // Vector Conversions. @@ -2585,24 +2625,22 @@ def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", v4f32, v4i32, uint_to_fp>; // VCVT : Vector Convert Between Floating-Point and Fixed-Point. -// Note: Some of the opcode bits in the following VCVT instructions need to -// be encoded based on the immed values. -def VCVTf2xsd : N2VCvtD<0, 1, 0b000000, 0b1111, 0, 1, "vcvt.s32.f32", +def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt.s32.f32", v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; -def VCVTf2xud : N2VCvtD<1, 1, 0b000000, 0b1111, 0, 1, "vcvt.u32.f32", +def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt.u32.f32", v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; -def VCVTxs2fd : N2VCvtD<0, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.s32", +def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt.f32.s32", v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; -def VCVTxu2fd : N2VCvtD<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32", +def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt.f32.u32", v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; -def VCVTf2xsq : N2VCvtQ<0, 1, 0b000000, 0b1111, 0, 1, "vcvt.s32.f32", +def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt.s32.f32", v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; -def VCVTf2xuq : N2VCvtQ<1, 1, 0b000000, 0b1111, 0, 1, "vcvt.u32.f32", +def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt.u32.f32", v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; -def VCVTxs2fq : N2VCvtQ<0, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.s32", +def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt.f32.s32", v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; -def VCVTxu2fq : N2VCvtQ<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32", +def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt.f32.u32", v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; // Vector Reverse. @@ -2670,18 +2708,18 @@ def VREV16q8 : VREV16Q<0b00, "vrev16.8", v16i8>; // VEXT : Vector Extract class VEXTd<string OpcodeStr, ValueType Ty> - : N3V<0,1,0b11,0b0000,0,0, (outs DPR:$dst), - (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD, - !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", - [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs), - (Ty DPR:$rhs), imm:$index)))]>; + : N3VImm<0,1,0b11,0,0, (outs DPR:$dst), + (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD, + !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", + [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs), + (Ty DPR:$rhs), imm:$index)))]>; class VEXTq<string OpcodeStr, ValueType Ty> - : N3V<0,1,0b11,0b0000,1,0, (outs QPR:$dst), - (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ, - !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", - [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs), - (Ty QPR:$rhs), imm:$index)))]>; + : N3VImm<0,1,0b11,1,0, (outs QPR:$dst), + (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ, + !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", + [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs), + (Ty QPR:$rhs), imm:$index)))]>; def VEXTd8 : VEXTd<"vext.8", v8i8>; def VEXTd16 : VEXTd<"vext.16", v4i16>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 0750dcc..2b6fa98 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -69,6 +69,25 @@ def t2_so_imm_neg : Operand<i32>, return ARM_AM::getT2SOImmVal(-((int)N->getZExtValue())) != -1; }], t2_so_imm_neg_XFORM>; +// Break t2_so_imm's up into two pieces. This handles immediates with up to 16 +// bits set in them. This uses t2_so_imm2part to match and t2_so_imm2part_[12] +// to get the first/second pieces. +def t2_so_imm2part : Operand<i32>, + PatLeaf<(imm), [{ + return ARM_AM::isT2SOImmTwoPartVal((unsigned)N->getZExtValue()); + }]> { +} + +def t2_so_imm2part_1 : SDNodeXForm<imm, [{ + unsigned V = ARM_AM::getT2SOImmTwoPartFirst((unsigned)N->getZExtValue()); + return CurDAG->getTargetConstant(V, MVT::i32); +}]>; + +def t2_so_imm2part_2 : SDNodeXForm<imm, [{ + unsigned V = ARM_AM::getT2SOImmTwoPartSecond((unsigned)N->getZExtValue()); + return CurDAG->getTargetConstant(V, MVT::i32); +}]>; + /// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31]. def imm1_31 : PatLeaf<(i32 imm), [{ return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 32; @@ -666,6 +685,8 @@ def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi, [(set GPR:$dst, (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>; +def : T2Pat<(or GPR:$src, 0xffff0000), (t2MOVTi16 GPR:$src, 0xffff)>; + //===----------------------------------------------------------------------===// // Extend Instructions. // @@ -1129,6 +1150,20 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), // Non-Instruction Patterns // +// Two piece so_imms. +def : T2Pat<(or GPR:$LHS, t2_so_imm2part:$RHS), + (t2ORRri (t2ORRri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), + (t2_so_imm2part_2 imm:$RHS))>; +def : T2Pat<(xor GPR:$LHS, t2_so_imm2part:$RHS), + (t2EORri (t2EORri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), + (t2_so_imm2part_2 imm:$RHS))>; +def : T2Pat<(add GPR:$LHS, t2_so_imm2part:$RHS), + (t2ADDri (t2ADDri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), + (t2_so_imm2part_2 imm:$RHS))>; +def : T2Pat<(sub GPR:$LHS, t2_so_imm2part:$RHS), + (t2SUBri (t2SUBri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), + (t2_so_imm2part_2 imm:$RHS))>; + // ConstantPool, GlobalAddress, and JumpTable def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>; def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>; diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index d2ec9ee..c9b9e84 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -974,6 +974,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { if (Advance) { ++Position; ++MBBI; + if (MBBI == E) + // Reach the end of the block, try merging the memory instructions. + TryMerge = true; } else TryMerge = true; diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 20a7355..e0be784 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -222,12 +222,9 @@ def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> { iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; }]; - // FIXME: We are reserving r3 in Thumb mode in case the PEI needs to use it - // to generate large stack offset. Make it available once we have register - // scavenging. let MethodBodies = [{ static const unsigned THUMB_tGPR_AO[] = { - ARM::R0, ARM::R1, ARM::R2, + ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, ARM::R5, ARM::R6, ARM::R7 }; // FP is R7, only low registers available. diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index cf1ee3f..5af95c3 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -27,11 +27,11 @@ UseNEONFP("arm-use-neon-fp", cl::init(false), cl::Hidden); ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, - bool isThumb) + bool isT) : ARMArchVersion(V4T) , ARMFPUType(None) , UseNEONForSinglePrecisionFP(UseNEONFP) - , IsThumb(isThumb) + , IsThumb(isT) , ThumbMode(Thumb1) , PostRAScheduler(false) , IsR9Reserved(ReserveR9) @@ -98,9 +98,13 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, if (isTargetDarwin()) IsR9Reserved = ReserveR9 | (ARMArchVersion < V6); + if (!isThumb() || hasThumb2()) + PostRAScheduler = true; + // Set CPU specific features. if (CPUString == "cortex-a8") { - PostRAScheduler = true; + // On Cortex-a8, it's faster to perform some single-precision FP + // operations with NEON instructions. if (UseNEONFP.getPosition() == 0) UseNEONForSinglePrecisionFP = true; } diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 7098fd4..7478159 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -126,9 +126,13 @@ protected: const std::string & getCPUString() const { return CPUString; } - /// enablePostRAScheduler - From TargetSubtarget, return true to - /// enable post-RA scheduler. - bool enablePostRAScheduler() const { return PostRAScheduler; } + /// enablePostRAScheduler - True at 'More' optimization except + /// for Thumb1. + bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, + TargetSubtarget::AntiDepBreakMode& mode) const { + mode = TargetSubtarget::ANTIDEP_NONE; + return PostRAScheduler && OptLevel >= CodeGenOpt::Default; + } /// getInstrItins - Return the instruction itineraies based on subtarget /// selection. diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 32ddc20..c1da6ce 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -16,7 +16,6 @@ #include "ARM.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 7438ea9..403f96c 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -56,6 +56,14 @@ private: bool ParseDirectiveWord(unsigned Size, SMLoc L); + bool ParseDirectiveThumb(SMLoc L); + + bool ParseDirectiveThumbFunc(SMLoc L); + + bool ParseDirectiveCode(SMLoc L); + + bool ParseDirectiveSyntax(SMLoc L); + // TODO - For now hacked versions of the next two are in here in this file to // allow some parser testing until the table gen versions are implemented. @@ -230,8 +238,8 @@ bool ARMAsmParser::ParseRegister(ARMOperand &Op) { return false; } -// Try to parse a register list. The first token must be a '{' when called -// for now. +// Parse a register list, return false if successful else return true or an +// error. The first token must be a '{' when called. bool ARMAsmParser::ParseRegisterList(ARMOperand &Op) { assert(getLexer().getTok().is(AsmToken::LCurly) && "Token is not an Left Curly Brace"); @@ -277,7 +285,8 @@ bool ARMAsmParser::ParseRegisterList(ARMOperand &Op) { return false; } -// Try to parse an arm memory expression. It must start with a '[' token. +// Parse an arm memory expression, return false if successful else return true +// or an error. The first token must be a '[' when called. // TODO Only preindexing and postindexing addressing are started, unindexed // with option, etc are still to do. bool ARMAsmParser::ParseMemory(ARMOperand &Op) { @@ -374,50 +383,55 @@ bool ARMAsmParser::ParseMemory(ARMOperand &Op) { Writeback = true; getLexer().Lex(); // Eat right bracket token. - const AsmToken &CommaTok = getLexer().getTok(); - if (CommaTok.isNot(AsmToken::Comma)) - return Error(CommaTok.getLoc(), "',' expected"); - getLexer().Lex(); // Eat comma token. - - const AsmToken &NextTok = getLexer().getTok(); - if (NextTok.is(AsmToken::Plus)) - getLexer().Lex(); // Eat plus token. - else if (NextTok.is(AsmToken::Minus)) { - Negative = true; - getLexer().Lex(); // Eat minus token - } - - // See if there is a register following the "[Rn]," we have so far. - const AsmToken &OffsetRegTok = getLexer().getTok(); - int OffsetRegNum = MatchRegisterName(OffsetRegTok.getString()); + int OffsetRegNum = 0; bool OffsetRegShifted = false; enum ShiftType ShiftType; const MCExpr *ShiftAmount; const MCExpr *Offset; - if (OffsetRegNum != -1) { - OffsetIsReg = true; - getLexer().Lex(); // Eat identifier token for the offset register. - // Look for a comma then a shift - const AsmToken &Tok = getLexer().getTok(); - if (Tok.is(AsmToken::Comma)) { - getLexer().Lex(); // Eat comma token. - const AsmToken &Tok = getLexer().getTok(); - if (ParseShift(&ShiftType, ShiftAmount)) - return Error(Tok.getLoc(), "shift expected"); - OffsetRegShifted = true; + const AsmToken &NextTok = getLexer().getTok(); + if (NextTok.isNot(AsmToken::EndOfStatement)) { + if (NextTok.isNot(AsmToken::Comma)) + return Error(NextTok.getLoc(), "',' expected"); + getLexer().Lex(); // Eat comma token. + + const AsmToken &PlusMinusTok = getLexer().getTok(); + if (PlusMinusTok.is(AsmToken::Plus)) + getLexer().Lex(); // Eat plus token. + else if (PlusMinusTok.is(AsmToken::Minus)) { + Negative = true; + getLexer().Lex(); // Eat minus token } - } - else { // "[Rn]," we have so far was not followed by "Rm" - // Look for #offset following the "[Rn]," - const AsmToken &HashTok = getLexer().getTok(); - if (HashTok.isNot(AsmToken::Hash)) - return Error(HashTok.getLoc(), "'#' expected"); - getLexer().Lex(); // Eat hash token. - if (getParser().ParseExpression(Offset)) - return true; + // See if there is a register following the "[Rn]," we have so far. + const AsmToken &OffsetRegTok = getLexer().getTok(); + OffsetRegNum = MatchRegisterName(OffsetRegTok.getString()); + if (OffsetRegNum != -1) { + OffsetIsReg = true; + getLexer().Lex(); // Eat identifier token for the offset register. + // Look for a comma then a shift + const AsmToken &Tok = getLexer().getTok(); + if (Tok.is(AsmToken::Comma)) { + getLexer().Lex(); // Eat comma token. + + const AsmToken &Tok = getLexer().getTok(); + if (ParseShift(&ShiftType, ShiftAmount)) + return Error(Tok.getLoc(), "shift expected"); + OffsetRegShifted = true; + } + } + else { // "[Rn]," we have so far was not followed by "Rm" + // Look for #offset following the "[Rn]," + const AsmToken &HashTok = getLexer().getTok(); + if (HashTok.isNot(AsmToken::Hash)) + return Error(HashTok.getLoc(), "'#' expected"); + getLexer().Lex(); // Eat hash token. + + if (getParser().ParseExpression(Offset)) + return true; + } } + Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum, OffsetRegShifted, ShiftType, ShiftAmount, Preindexed, Postindexed, Negative, Writeback); @@ -465,7 +479,7 @@ bool ARMAsmParser::ParseShift(ShiftType *St, const MCExpr *&ShiftAmount) { return false; } -// A hack to allow some testing +// A hack to allow some testing, to be replaced by a real table gen version. int ARMAsmParser::MatchRegisterName(const StringRef &Name) { if (Name == "r0" || Name == "R0") return 0; @@ -504,7 +518,7 @@ int ARMAsmParser::MatchRegisterName(const StringRef &Name) { return -1; } -// A hack to allow some testing +// A hack to allow some testing, to be replaced by a real table gen version. bool ARMAsmParser::MatchInstruction(SmallVectorImpl<ARMOperand> &Operands, MCInst &Inst) { struct ARMOperand Op0 = Operands[0]; @@ -516,40 +530,58 @@ bool ARMAsmParser::MatchInstruction(SmallVectorImpl<ARMOperand> &Operands, Mnemonic == "ldmfd" || Mnemonic == "ldr" || Mnemonic == "mov" || - Mnemonic == "sub") + Mnemonic == "sub" || + Mnemonic == "bl" || + Mnemonic == "push" || + Mnemonic == "blx" || + Mnemonic == "pop") { + // Hard-coded to a valid instruction, till we have a real matcher. + Inst = MCInst(); + Inst.setOpcode(ARM::MOVr); + Inst.addOperand(MCOperand::CreateReg(2)); + Inst.addOperand(MCOperand::CreateReg(2)); + Inst.addOperand(MCOperand::CreateImm(0)); + Inst.addOperand(MCOperand::CreateImm(0)); + Inst.addOperand(MCOperand::CreateReg(0)); return false; + } return true; } -// TODO - this is a work in progress +// Parse a arm instruction operand. For now this parses the operand regardless +// of the mnemonic. bool ARMAsmParser::ParseOperand(ARMOperand &Op) { switch (getLexer().getKind()) { case AsmToken::Identifier: if (!ParseRegister(Op)) return false; - // TODO parse other operands that start with an identifier like labels - return Error(getLexer().getTok().getLoc(), "labels not yet supported"); + // This was not a register so parse other operands that start with an + // identifier (like labels) as expressions and create them as immediates. + const MCExpr *IdVal; + if (getParser().ParseExpression(IdVal)) + return true; + Op = ARMOperand::CreateImm(IdVal); + return false; case AsmToken::LBrac: - if (!ParseMemory(Op)) - return false; + return ParseMemory(Op); case AsmToken::LCurly: - if (!ParseRegisterList(Op)) - return false; + return ParseRegisterList(Op); case AsmToken::Hash: // #42 -> immediate. // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate getLexer().Lex(); - const MCExpr *Val; - if (getParser().ParseExpression(Val)) + const MCExpr *ImmVal; + if (getParser().ParseExpression(ImmVal)) return true; - Op = ARMOperand::CreateImm(Val); + Op = ARMOperand::CreateImm(ImmVal); return false; default: return Error(getLexer().getTok().getLoc(), "unexpected token in operand"); } } +// Parse an arm instruction mnemonic followed by its operands. bool ARMAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) { SmallVector<ARMOperand, 7> Operands; @@ -579,10 +611,19 @@ bool ARMAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) { return true; } +/// ParseDirective parses the arm specific directives bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getIdentifier(); if (IDVal == ".word") return ParseDirectiveWord(4, DirectiveID.getLoc()); + else if (IDVal == ".thumb") + return ParseDirectiveThumb(DirectiveID.getLoc()); + else if (IDVal == ".thumb_func") + return ParseDirectiveThumbFunc(DirectiveID.getLoc()); + else if (IDVal == ".code") + return ParseDirectiveCode(DirectiveID.getLoc()); + else if (IDVal == ".syntax") + return ParseDirectiveSyntax(DirectiveID.getLoc()); return true; } @@ -611,6 +652,93 @@ bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { return false; } +/// ParseDirectiveThumb +/// ::= .thumb +bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return Error(L, "unexpected token in directive"); + getLexer().Lex(); + + // TODO: set thumb mode + // TODO: tell the MC streamer the mode + // getParser().getStreamer().Emit???(); + return false; +} + +/// ParseDirectiveThumbFunc +/// ::= .thumbfunc symbol_name +bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) { + const AsmToken &Tok = getLexer().getTok(); + if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) + return Error(L, "unexpected token in .syntax directive"); + StringRef SymbolName = getLexer().getTok().getIdentifier(); + getLexer().Lex(); // Consume the identifier token. + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return Error(L, "unexpected token in directive"); + getLexer().Lex(); + + // TODO: mark symbol as a thumb symbol + // getParser().getStreamer().Emit???(); + return false; +} + +/// ParseDirectiveSyntax +/// ::= .syntax unified | divided +bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) { + const AsmToken &Tok = getLexer().getTok(); + if (Tok.isNot(AsmToken::Identifier)) + return Error(L, "unexpected token in .syntax directive"); + const StringRef &Mode = Tok.getString(); + bool unified_syntax; + if (Mode == "unified" || Mode == "UNIFIED") { + getLexer().Lex(); + unified_syntax = true; + } + else if (Mode == "divided" || Mode == "DIVIDED") { + getLexer().Lex(); + unified_syntax = false; + } + else + return Error(L, "unrecognized syntax mode in .syntax directive"); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return Error(getLexer().getTok().getLoc(), "unexpected token in directive"); + getLexer().Lex(); + + // TODO tell the MC streamer the mode + // getParser().getStreamer().Emit???(); + return false; +} + +/// ParseDirectiveCode +/// ::= .code 16 | 32 +bool ARMAsmParser::ParseDirectiveCode(SMLoc L) { + const AsmToken &Tok = getLexer().getTok(); + if (Tok.isNot(AsmToken::Integer)) + return Error(L, "unexpected token in .code directive"); + int64_t Val = getLexer().getTok().getIntVal(); + bool thumb_mode; + if (Val == 16) { + getLexer().Lex(); + thumb_mode = true; + } + else if (Val == 32) { + getLexer().Lex(); + thumb_mode = false; + } + else + return Error(L, "invalid operand to .code directive"); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return Error(getLexer().getTok().getLoc(), "unexpected token in directive"); + getLexer().Lex(); + + // TODO tell the MC streamer the mode + // getParser().getStreamer().Emit???(); + return false; +} + // Force static initialization. extern "C" void LLVMInitializeARMAsmParser() { RegisterAsmParser<ARMAsmParser> X(TheARMTarget); diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 546731b..8719e4c 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -1,3 +1,5 @@ +//===-- ARMAsmPrinter.cpp - Print machine code to an ARM .s file ----------===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source @@ -13,21 +15,25 @@ #define DEBUG_TYPE "asm-printer" #include "ARM.h" #include "ARMBuildAttrs.h" -#include "ARMTargetMachine.h" #include "ARMAddressingModes.h" #include "ARMConstantPoolValue.h" +#include "ARMInstPrinter.h" #include "ARMMachineFunctionInfo.h" +#include "ARMMCInstLower.h" +#include "ARMTargetMachine.h" #include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DwarfWriter.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -38,18 +44,22 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSet.h" -#include "llvm/Support/Compiler.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Support/Mangler.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/FormattedStream.h" #include <cctype> using namespace llvm; STATISTIC(EmittedInsts, "Number of machine instrs printed"); +static cl::opt<bool> +EnableMCInst("enable-arm-mcinst-printer", cl::Hidden, + cl::desc("enable experimental asmprinter gunk in the arm backend")); + namespace { - class VISIBILITY_HIDDEN ARMAsmPrinter : public AsmPrinter { + class ARMAsmPrinter : public AsmPrinter { /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when printing asm code for different targets. @@ -63,34 +73,23 @@ namespace { /// MachineFunction. const MachineConstantPool *MCP; - /// We name each basic block in a Function with a unique number, so - /// that we can consistently refer to them later. This is cleared - /// at the beginning of each call to runOnMachineFunction(). - /// - typedef std::map<const Value *, unsigned> ValueMapTy; - ValueMapTy NumberForBB; - - /// GVNonLazyPtrs - Keeps the set of GlobalValues that require - /// non-lazy-pointers for indirect access. - StringMap<std::string> GVNonLazyPtrs; - - /// HiddenGVNonLazyPtrs - Keeps the set of GlobalValues with hidden - /// visibility that require non-lazy-pointers for indirect access. - StringMap<std::string> HiddenGVNonLazyPtrs; - - /// True if asm printer is printing a series of CONSTPOOL_ENTRY. - bool InCPMode; public: explicit ARMAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, const MCAsmInfo *T, bool V) - : AsmPrinter(O, TM, T, V), AFI(NULL), MCP(NULL), - InCPMode(false) { + : AsmPrinter(O, TM, T, V), AFI(NULL), MCP(NULL) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); } virtual const char *getPassName() const { return "ARM Assembly Printer"; } + + void printMCInst(const MCInst *MI) { + ARMInstPrinter(O, *MAI, VerboseAsm).printInstruction(MI); + } + + void printInstructionThroughMCStreamer(const MachineInstr *MI); + void printOperand(const MachineInstr *MI, int OpNum, const char *Modifier = 0); @@ -149,8 +148,8 @@ namespace { void printMachineInstruction(const MachineInstr *MI); bool runOnMachineFunction(MachineFunction &F); - bool doFinalization(Module &M); void EmitStartOfAsmFile(Module &M); + void EmitEndOfAsmFile(Module &M); /// EmitMachineConstantPoolValue - Print a machine constantpool value to /// the .s file. @@ -173,12 +172,19 @@ namespace { Name = Mang->getMangledName(GV); else { // FIXME: Remove this when Darwin transition to @GOT like syntax. - std::string SymName = Mang->getMangledName(GV); Name = Mang->getMangledName(GV, "$non_lazy_ptr", true); - if (GV->hasHiddenVisibility()) - HiddenGVNonLazyPtrs[SymName] = Name; - else - GVNonLazyPtrs[SymName] = Name; + MCSymbol *Sym = OutContext.GetOrCreateSymbol(StringRef(Name)); + + MachineModuleInfoMachO &MMIMachO = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + const MCSymbol *&StubSym = + GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(Sym) : + MMIMachO.getGVStubEntry(Sym); + if (StubSym == 0) { + SmallString<128> NameStr; + Mang->getNameWithPrefix(NameStr, GV, false); + StubSym = OutContext.GetOrCreateSymbol(NameStr.str()); + } } } else Name = Mang->makeNameProper(ACPV->getSymbol()); @@ -260,7 +266,6 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { if (Subtarget->isTargetDarwin()) O << "\t" << CurrentFnName; O << "\n"; - InCPMode = false; } else { EmitAlignment(FnAlign, F); } @@ -283,14 +288,13 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); I != E; ++I) { // Print a label for the basic block. - if (I != MF.begin()) { + if (I != MF.begin()) EmitBasicBlockStart(I); - } + + // Print the assembly for the instruction. for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); - II != E; ++II) { - // Print the assembly for the instruction. + II != E; ++II) printMachineInstruction(II); - } } if (MAI->hasDotTypeDotSizeDirective()) @@ -306,25 +310,25 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, const char *Modifier) { const MachineOperand &MO = MI->getOperand(OpNum); switch (MO.getType()) { + default: + assert(0 && "<unknown operand type>"); case MachineOperand::MO_Register: { unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - if (Modifier && strcmp(Modifier, "dregpair") == 0) { - unsigned DRegLo = TRI->getSubReg(Reg, 5); // arm_dsubreg_0 - unsigned DRegHi = TRI->getSubReg(Reg, 6); // arm_dsubreg_1 - O << '{' - << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi) - << '}'; - } else if (Modifier && strcmp(Modifier, "lane") == 0) { - unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg); - unsigned DReg = TRI->getMatchingSuperReg(Reg, RegNum & 1 ? 2 : 1, - &ARM::DPR_VFP2RegClass); - O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']'; - } else { - O << getRegisterName(Reg); - } - } else - llvm_unreachable("not implemented"); + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + if (Modifier && strcmp(Modifier, "dregpair") == 0) { + unsigned DRegLo = TRI->getSubReg(Reg, 5); // arm_dsubreg_0 + unsigned DRegHi = TRI->getSubReg(Reg, 6); // arm_dsubreg_1 + O << '{' + << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi) + << '}'; + } else if (Modifier && strcmp(Modifier, "lane") == 0) { + unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg); + unsigned DReg = TRI->getMatchingSuperReg(Reg, RegNum & 1 ? 2 : 1, + &ARM::DPR_VFP2RegClass); + O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']'; + } else { + O << getRegisterName(Reg); + } break; } case MachineOperand::MO_Immediate: { @@ -372,8 +376,6 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << MO.getIndex(); break; - default: - O << "<unknown operand type>"; abort (); break; } } @@ -1027,22 +1029,19 @@ bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) { ++EmittedInsts; - int Opc = MI->getOpcode(); - switch (Opc) { - case ARM::CONSTPOOL_ENTRY: - if (!InCPMode && AFI->isThumbFunction()) { - EmitAlignment(2); - InCPMode = true; - } - break; - default: { - if (InCPMode && AFI->isThumbFunction()) - InCPMode = false; - }} - // Call the autogenerated instruction printer routines. processDebugLoc(MI, true); - printInstruction(MI); + + if (EnableMCInst) { + printInstructionThroughMCStreamer(MI); + } else { + int Opc = MI->getOpcode(); + if (Opc == ARM::CONSTPOOL_ENTRY) + EmitAlignment(2); + + printInstruction(MI); + } + if (VerboseAsm && !MI->getDebugLoc().isUnknown()) EmitComments(*MI); O << '\n'; @@ -1256,34 +1255,40 @@ void ARMAsmPrinter::PrintGlobalVariable(const GlobalVariable* GVar) { } -bool ARMAsmPrinter::doFinalization(Module &M) { +void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { if (Subtarget->isTargetDarwin()) { // All darwin targets use mach-o. TargetLoweringObjectFileMachO &TLOFMacho = static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering()); + MachineModuleInfoMachO &MMIMacho = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); O << '\n'; // Output non-lazy-pointers for external and common global variables. - if (!GVNonLazyPtrs.empty()) { + MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetGVStubList(); + + if (!Stubs.empty()) { // Switch with ".non_lazy_symbol_pointer" directive. OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); EmitAlignment(2); - for (StringMap<std::string>::iterator I = GVNonLazyPtrs.begin(), - E = GVNonLazyPtrs.end(); I != E; ++I) { - O << I->second << ":\n"; - O << "\t.indirect_symbol " << I->getKeyData() << "\n"; - O << "\t.long\t0\n"; + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + Stubs[i].first->print(O, MAI); + O << ":\n\t.indirect_symbol "; + Stubs[i].second->print(O, MAI); + O << "\n\t.long\t0\n"; } } - if (!HiddenGVNonLazyPtrs.empty()) { + Stubs = MMIMacho.GetHiddenGVStubList(); + if (!Stubs.empty()) { OutStreamer.SwitchSection(getObjFileLowering().getDataSection()); EmitAlignment(2); - for (StringMap<std::string>::iterator I = HiddenGVNonLazyPtrs.begin(), - E = HiddenGVNonLazyPtrs.end(); I != E; ++I) { - O << I->second << ":\n"; - O << "\t.long " << I->getKeyData() << "\n"; + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + Stubs[i].first->print(O, MAI); + O << ":\n\t.long "; + Stubs[i].second->print(O, MAI); + O << "\n"; } } @@ -1292,14 +1297,179 @@ bool ARMAsmPrinter::doFinalization(Module &M) { // implementation of multiple entry points). If this doesn't occur, the // linker can safely perform dead code stripping. Since LLVM never // generates code that does this, it is always safe to set. - O << "\t.subsections_via_symbols\n"; + OutStreamer.EmitAssemblerFlag(MCStreamer::SubsectionsViaSymbols); + } +} + +//===----------------------------------------------------------------------===// + +void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { + ARMMCInstLower MCInstLowering(OutContext, *Mang, *this); + switch (MI->getOpcode()) { + case ARM::t2MOVi32imm: + assert(0 && "Should be lowered by thumb2it pass"); + default: break; + case TargetInstrInfo::DBG_LABEL: + case TargetInstrInfo::EH_LABEL: + case TargetInstrInfo::GC_LABEL: + printLabel(MI); + return; + case TargetInstrInfo::KILL: + return; + case TargetInstrInfo::INLINEASM: + O << '\t'; + printInlineAsm(MI); + return; + case TargetInstrInfo::IMPLICIT_DEF: + printImplicitDef(MI); + return; + case ARM::PICADD: { // FIXME: Remove asm string from td file. + // This is a pseudo op for a label + instruction sequence, which looks like: + // LPC0: + // add r0, pc, r0 + // This adds the address of LPC0 to r0. + + // Emit the label. + // FIXME: MOVE TO SHARED PLACE. + unsigned Id = (unsigned)MI->getOperand(2).getImm(); + const char *Prefix = MAI->getPrivateGlobalPrefix(); + MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix)+"PC"+Twine(Id)); + OutStreamer.EmitLabel(Label); + + + // Form and emit tha dd. + MCInst AddInst; + AddInst.setOpcode(ARM::ADDrr); + AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + AddInst.addOperand(MCOperand::CreateReg(ARM::PC)); + AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); + printMCInst(&AddInst); + return; + } + case ARM::CONSTPOOL_ENTRY: { // FIXME: Remove asm string from td file. + /// CONSTPOOL_ENTRY - This instruction represents a floating constant pool + /// in the function. The first operand is the ID# for this instruction, the + /// second is the index into the MachineConstantPool that this is, the third + /// is the size in bytes of this constant pool entry. + unsigned LabelId = (unsigned)MI->getOperand(0).getImm(); + unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex(); + + EmitAlignment(2); + + const char *Prefix = MAI->getPrivateGlobalPrefix(); + MCSymbol *Label = OutContext.GetOrCreateSymbol(Twine(Prefix)+"CPI"+ + Twine(getFunctionNumber())+ + "_"+ Twine(LabelId)); + OutStreamer.EmitLabel(Label); + + const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx]; + if (MCPE.isMachineConstantPoolEntry()) + EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal); + else + EmitGlobalConstant(MCPE.Val.ConstVal); + + return; } + case ARM::MOVi2pieces: { // FIXME: Remove asmstring from td file. + // This is a hack that lowers as a two instruction sequence. + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned ImmVal = (unsigned)MI->getOperand(1).getImm(); + + unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal); + unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal); + + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVi); + TmpInst.addOperand(MCOperand::CreateReg(DstReg)); + TmpInst.addOperand(MCOperand::CreateImm(SOImmValV1)); + + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); + + TmpInst.addOperand(MCOperand::CreateReg(0)); // cc_out + printMCInst(&TmpInst); + O << '\n'; + } - return AsmPrinter::doFinalization(M); + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::ORRri); + TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg + TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // inreg + TmpInst.addOperand(MCOperand::CreateImm(SOImmValV2)); // so_imm + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); + + TmpInst.addOperand(MCOperand::CreateReg(0)); // cc_out + printMCInst(&TmpInst); + } + return; + } + case ARM::MOVi32imm: { // FIXME: Remove asmstring from td file. + // This is a hack that lowers as a two instruction sequence. + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned ImmVal = (unsigned)MI->getOperand(1).getImm(); + + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVi16); + TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg + TmpInst.addOperand(MCOperand::CreateImm(ImmVal & 65535)); // lower16(imm) + + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); + + printMCInst(&TmpInst); + O << '\n'; + } + + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVTi16); + TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg + TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // srcreg + TmpInst.addOperand(MCOperand::CreateImm(ImmVal >> 16)); // upper16(imm) + + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); + + printMCInst(&TmpInst); + } + + return; + } + } + + MCInst TmpInst; + MCInstLowering.Lower(MI, TmpInst); + + printMCInst(&TmpInst); +} + +//===----------------------------------------------------------------------===// +// Target Registry Stuff +//===----------------------------------------------------------------------===// + +static MCInstPrinter *createARMMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + raw_ostream &O) { + if (SyntaxVariant == 0) + return new ARMInstPrinter(O, MAI, false); + return 0; } // Force static initialization. extern "C" void LLVMInitializeARMAsmPrinter() { RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget); RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget); + + TargetRegistry::RegisterMCInstPrinter(TheARMTarget, createARMMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheThumbTarget, createARMMCInstPrinter); } + diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp new file mode 100644 index 0000000..f422798 --- /dev/null +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -0,0 +1,358 @@ +//===-- ARMInstPrinter.cpp - Convert ARM MCInst to assembly syntax --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an ARM MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "ARM.h" // FIXME: FACTOR ENUMS BETTER. +#include "ARMInstPrinter.h" +#include "ARMAddressingModes.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +// Include the auto-generated portion of the assembly writer. +#define MachineInstr MCInst +#define ARMAsmPrinter ARMInstPrinter // FIXME: REMOVE. +#define NO_ASM_WRITER_BOILERPLATE +#include "ARMGenAsmWriter.inc" +#undef MachineInstr +#undef ARMAsmPrinter + +void ARMInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); } + +void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + const char *Modifier) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + unsigned Reg = Op.getReg(); + if (Modifier && strcmp(Modifier, "dregpair") == 0) { + // FIXME: Breaks e.g. ARM/vmul.ll. + assert(0); + /* + unsigned DRegLo = TRI->getSubReg(Reg, 5); // arm_dsubreg_0 + unsigned DRegHi = TRI->getSubReg(Reg, 6); // arm_dsubreg_1 + O << '{' + << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi) + << '}';*/ + } else if (Modifier && strcmp(Modifier, "lane") == 0) { + assert(0); + /* + unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg); + unsigned DReg = TRI->getMatchingSuperReg(Reg, RegNum & 1 ? 2 : 1, + &ARM::DPR_VFP2RegClass); + O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']'; + */ + } else { + O << getRegisterName(Reg); + } + } else if (Op.isImm()) { + assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); + O << '#' << Op.getImm(); + } else { + assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); + assert(Op.isExpr() && "unknown operand kind in printOperand"); + Op.getExpr()->print(O, &MAI); + } +} + +static void printSOImm(raw_ostream &O, int64_t V, bool VerboseAsm, + const MCAsmInfo *MAI) { + // Break it up into two parts that make up a shifter immediate. + V = ARM_AM::getSOImmVal(V); + assert(V != -1 && "Not a valid so_imm value!"); + + unsigned Imm = ARM_AM::getSOImmValImm(V); + unsigned Rot = ARM_AM::getSOImmValRot(V); + + // Print low-level immediate formation info, per + // A5.1.3: "Data-processing operands - Immediate". + if (Rot) { + O << "#" << Imm << ", " << Rot; + // Pretty printed version. + if (VerboseAsm) + O << ' ' << MAI->getCommentString() + << ' ' << (int)ARM_AM::rotr32(Imm, Rot); + } else { + O << "#" << Imm; + } +} + + +/// printSOImmOperand - SOImm is 4-bit rotate amount in bits 8-11 with 8-bit +/// immediate in bits 0-7. +void ARMInstPrinter::printSOImmOperand(const MCInst *MI, unsigned OpNum) { + const MCOperand &MO = MI->getOperand(OpNum); + assert(MO.isImm() && "Not a valid so_imm value!"); + printSOImm(O, MO.getImm(), VerboseAsm, &MAI); +} + +/// printSOImm2PartOperand - SOImm is broken into two pieces using a 'mov' +/// followed by an 'orr' to materialize. +void ARMInstPrinter::printSOImm2PartOperand(const MCInst *MI, unsigned OpNum) { + // FIXME: REMOVE this method. + abort(); +} + +// so_reg is a 4-operand unit corresponding to register forms of the A5.1 +// "Addressing Mode 1 - Data-processing operands" forms. This includes: +// REG 0 0 - e.g. R5 +// REG REG 0,SH_OPC - e.g. R5, ROR R3 +// REG 0 IMM,SH_OPC - e.g. R5, LSL #3 +void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO3 = MI->getOperand(OpNum+2); + + O << getRegisterName(MO1.getReg()); + + // Print the shift opc. + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm())) + << ' '; + + if (MO2.getReg()) { + O << getRegisterName(MO2.getReg()); + assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); + } else { + O << "#" << ARM_AM::getSORegOffset(MO3.getImm()); + } +} + + +void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + const MCOperand &MO3 = MI->getOperand(Op+2); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op); + return; + } + + O << "[" << getRegisterName(MO1.getReg()); + + if (!MO2.getReg()) { + if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0. + O << ", #" + << (char)ARM_AM::getAM2Op(MO3.getImm()) + << ARM_AM::getAM2Offset(MO3.getImm()); + O << "]"; + return; + } + + O << ", " + << (char)ARM_AM::getAM2Op(MO3.getImm()) + << getRegisterName(MO2.getReg()); + + if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm())) + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm())) + << " #" << ShImm; + O << "]"; +} + +void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + if (!MO1.getReg()) { + unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); + assert(ImmOffs && "Malformed indexed load / store!"); + O << '#' << (char)ARM_AM::getAM2Op(MO2.getImm()) << ImmOffs; + return; + } + + O << (char)ARM_AM::getAM2Op(MO2.getImm()) << getRegisterName(MO1.getReg()); + + if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm())) + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm())) + << " #" << ShImm; +} + +void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO3 = MI->getOperand(OpNum+2); + + O << '[' << getRegisterName(MO1.getReg()); + + if (MO2.getReg()) { + O << ", " << (char)ARM_AM::getAM3Op(MO3.getImm()) + << getRegisterName(MO2.getReg()) << ']'; + return; + } + + if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm())) + O << ", #" + << (char)ARM_AM::getAM3Op(MO3.getImm()) + << ImmOffs; + O << ']'; +} + +void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + if (MO1.getReg()) { + O << (char)ARM_AM::getAM3Op(MO2.getImm()) + << getRegisterName(MO1.getReg()); + return; + } + + unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); + assert(ImmOffs && "Malformed indexed load / store!"); + O << "#" + << (char)ARM_AM::getAM3Op(MO2.getImm()) + << ImmOffs; +} + + +void ARMInstPrinter::printAddrMode4Operand(const MCInst *MI, unsigned OpNum, + const char *Modifier) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); + if (Modifier && strcmp(Modifier, "submode") == 0) { + if (MO1.getReg() == ARM::SP) { + // FIXME + bool isLDM = (MI->getOpcode() == ARM::LDM || + MI->getOpcode() == ARM::LDM_RET || + MI->getOpcode() == ARM::t2LDM || + MI->getOpcode() == ARM::t2LDM_RET); + O << ARM_AM::getAMSubModeAltStr(Mode, isLDM); + } else + O << ARM_AM::getAMSubModeStr(Mode); + } else if (Modifier && strcmp(Modifier, "wide") == 0) { + ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); + if (Mode == ARM_AM::ia) + O << ".w"; + } else { + printOperand(MI, OpNum); + if (ARM_AM::getAM4WBFlag(MO2.getImm())) + O << "!"; + } +} + +void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, + const char *Modifier) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, OpNum); + return; + } + + if (Modifier && strcmp(Modifier, "submode") == 0) { + ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm()); + if (MO1.getReg() == ARM::SP) { + bool isFLDM = (MI->getOpcode() == ARM::FLDMD || + MI->getOpcode() == ARM::FLDMS); + O << ARM_AM::getAMSubModeAltStr(Mode, isFLDM); + } else + O << ARM_AM::getAMSubModeStr(Mode); + return; + } else if (Modifier && strcmp(Modifier, "base") == 0) { + // Used for FSTM{D|S} and LSTM{D|S} operations. + O << getRegisterName(MO1.getReg()); + if (ARM_AM::getAM5WBFlag(MO2.getImm())) + O << "!"; + return; + } + + O << "[" << getRegisterName(MO1.getReg()); + + if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) { + O << ", #" + << (char)ARM_AM::getAM5Op(MO2.getImm()) + << ImmOffs*4; + } + O << "]"; +} + +void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO3 = MI->getOperand(OpNum+2); + + // FIXME: No support yet for specifying alignment. + O << '[' << getRegisterName(MO1.getReg()) << ']'; + + if (ARM_AM::getAM6WBFlag(MO3.getImm())) { + if (MO2.getReg() == 0) + O << '!'; + else + O << ", " << getRegisterName(MO2.getReg()); + } +} + +void ARMInstPrinter::printAddrModePCOperand(const MCInst *MI, unsigned OpNum, + const char *Modifier) { + assert(0 && "FIXME: Implement printAddrModePCOperand"); +} + +void ARMInstPrinter::printBitfieldInvMaskImmOperand (const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO = MI->getOperand(OpNum); + uint32_t v = ~MO.getImm(); + int32_t lsb = CountTrailingZeros_32(v); + int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb; + assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!"); + O << '#' << lsb << ", #" << width; +} + +void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum) { + O << "{"; + // Always skip the first operand, it's the optional (and implicit writeback). + for (unsigned i = OpNum+1, e = MI->getNumOperands(); i != e; ++i) { + if (i != OpNum+1) O << ", "; + O << getRegisterName(MI->getOperand(i).getReg()); + } + O << "}"; +} + +void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum) { + ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); + if (CC != ARMCC::AL) + O << ARMCondCodeToString(CC); +} + +void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum){ + if (MI->getOperand(OpNum).getReg()) { + assert(MI->getOperand(OpNum).getReg() == ARM::CPSR && + "Expect ARM CPSR register!"); + O << 's'; + } +} + + + +void ARMInstPrinter::printCPInstOperand(const MCInst *MI, unsigned OpNum, + const char *Modifier) { + // FIXME: remove this. + abort(); +} + +void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum) { + O << MI->getOperand(OpNum).getImm(); +} + + +void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum) { + // FIXME: remove this. + abort(); +} diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h new file mode 100644 index 0000000..4925137 --- /dev/null +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h @@ -0,0 +1,89 @@ +//===-- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an ARM MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMINSTPRINTER_H +#define ARMINSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { + class MCOperand; + +class ARMInstPrinter : public MCInstPrinter { + bool VerboseAsm; +public: + ARMInstPrinter(raw_ostream &O, const MCAsmInfo &MAI, bool verboseAsm) + : MCInstPrinter(O, MAI), VerboseAsm(verboseAsm) {} + + virtual void printInst(const MCInst *MI); + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI); + static const char *getRegisterName(unsigned RegNo); + + + void printOperand(const MCInst *MI, unsigned OpNo, + const char *Modifier = 0); + + void printSOImmOperand(const MCInst *MI, unsigned OpNum); + void printSOImm2PartOperand(const MCInst *MI, unsigned OpNum); + + void printSORegOperand(const MCInst *MI, unsigned OpNum); + void printAddrMode2Operand(const MCInst *MI, unsigned OpNum); + void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum); + void printAddrMode3Operand(const MCInst *MI, unsigned OpNum); + void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum); + void printAddrMode4Operand(const MCInst *MI, unsigned OpNum, + const char *Modifier = 0); + void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, + const char *Modifier = 0); + void printAddrMode6Operand(const MCInst *MI, unsigned OpNum); + void printAddrModePCOperand(const MCInst *MI, unsigned OpNum, + const char *Modifier = 0); + + void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum); + + void printThumbITMask(const MCInst *MI, unsigned OpNum) {} + void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum) {} + void printThumbAddrModeRI5Operand(const MCInst *MI, unsigned OpNum, + unsigned Scale) {} + void printThumbAddrModeS1Operand(const MCInst *MI, unsigned OpNum) {} + void printThumbAddrModeS2Operand(const MCInst *MI, unsigned OpNum) {} + void printThumbAddrModeS4Operand(const MCInst *MI, unsigned OpNum) {} + void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum) {} + + void printT2SOOperand(const MCInst *MI, unsigned OpNum) {} + void printT2AddrModeImm12Operand(const MCInst *MI, unsigned OpNum) {} + void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum) {} + void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum) {} + void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum) {} + void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum) {} + + void printPredicateOperand(const MCInst *MI, unsigned OpNum); + void printSBitModifierOperand(const MCInst *MI, unsigned OpNum); + void printRegisterList(const MCInst *MI, unsigned OpNum); + void printCPInstOperand(const MCInst *MI, unsigned OpNum, + const char *Modifier); + void printJTBlockOperand(const MCInst *MI, unsigned OpNum) {} + void printJT2BlockOperand(const MCInst *MI, unsigned OpNum) {} + void printTBAddrMode(const MCInst *MI, unsigned OpNum) {} + void printNoHashImmediate(const MCInst *MI, unsigned OpNum); + + void printPCLabel(const MCInst *MI, unsigned OpNum); + // FIXME: Implement. + void PrintSpecial(const MCInst *MI, const char *Kind) {} +}; + +} + +#endif diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp new file mode 100644 index 0000000..757164e --- /dev/null +++ b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp @@ -0,0 +1,166 @@ +//===-- ARMMCInstLower.cpp - Convert ARM MachineInstr to an MCInst --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower ARM MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "ARMMCInstLower.h" +//#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +//#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Mangler.h" +#include "llvm/ADT/SmallString.h" +using namespace llvm; + + +#if 0 +const ARMSubtarget &ARMMCInstLower::getSubtarget() const { + return AsmPrinter.getSubtarget(); +} + +MachineModuleInfoMachO &ARMMCInstLower::getMachOMMI() const { + assert(getSubtarget().isTargetDarwin() &&"Can only get MachO info on darwin"); + return AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoMachO>(); +} +#endif + +MCSymbol *ARMMCInstLower:: +GetGlobalAddressSymbol(const MachineOperand &MO) const { + const GlobalValue *GV = MO.getGlobal(); + + SmallString<128> Name; + Mang.getNameWithPrefix(Name, GV, false); + + // FIXME: HANDLE PLT references how?? + switch (MO.getTargetFlags()) { + default: assert(0 && "Unknown target flag on GV operand"); + case 0: break; + } + + return Ctx.GetOrCreateSymbol(Name.str()); +} + +MCSymbol *ARMMCInstLower:: +GetExternalSymbolSymbol(const MachineOperand &MO) const { + SmallString<128> Name; + Name += Printer.MAI->getGlobalPrefix(); + Name += MO.getSymbolName(); + + // FIXME: HANDLE PLT references how?? + switch (MO.getTargetFlags()) { + default: assert(0 && "Unknown target flag on GV operand"); + case 0: break; + } + + return Ctx.GetOrCreateSymbol(Name.str()); +} + + + +MCSymbol *ARMMCInstLower:: +GetJumpTableSymbol(const MachineOperand &MO) const { + SmallString<256> Name; + raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI" + << Printer.getFunctionNumber() << '_' << MO.getIndex(); + +#if 0 + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on GV operand"); + } +#endif + + // Create a symbol for the name. + return Ctx.GetOrCreateSymbol(Name.str()); +} + +MCSymbol *ARMMCInstLower:: +GetConstantPoolIndexSymbol(const MachineOperand &MO) const { + SmallString<256> Name; + raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI" + << Printer.getFunctionNumber() << '_' << MO.getIndex(); + +#if 0 + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on GV operand"); + } +#endif + + // Create a symbol for the name. + return Ctx.GetOrCreateSymbol(Name.str()); +} + +MCOperand ARMMCInstLower:: +LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { + // FIXME: We would like an efficient form for this, so we don't have to do a + // lot of extra uniquing. + const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + +#if 0 + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on GV operand"); + } +#endif + + if (!MO.isJTI() && MO.getOffset()) + Expr = MCBinaryExpr::CreateAdd(Expr, + MCConstantExpr::Create(MO.getOffset(), Ctx), + Ctx); + return MCOperand::CreateExpr(Expr); +} + + +void ARMMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + MCOperand MCOp; + switch (MO.getType()) { + default: + MI->dump(); + assert(0 && "unknown operand type"); + case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) continue; + MCOp = MCOperand::CreateReg(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( + Printer.GetMBBSymbol(MO.getMBB()->getNumber()), Ctx)); + break; + case MachineOperand::MO_GlobalAddress: + MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); + break; + case MachineOperand::MO_ExternalSymbol: + MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO)); + break; + case MachineOperand::MO_JumpTableIndex: + MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO)); + break; + case MachineOperand::MO_ConstantPoolIndex: + MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO)); + break; + } + + OutMI.addOperand(MCOp); + } + +} diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h new file mode 100644 index 0000000..383d30d --- /dev/null +++ b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h @@ -0,0 +1,56 @@ +//===-- ARMMCInstLower.h - Lower MachineInstr to MCInst -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef ARM_MCINSTLOWER_H +#define ARM_MCINSTLOWER_H + +#include "llvm/Support/Compiler.h" + +namespace llvm { + class AsmPrinter; + class MCAsmInfo; + class MCContext; + class MCInst; + class MCOperand; + class MCSymbol; + class MachineInstr; + class MachineModuleInfoMachO; + class MachineOperand; + class Mangler; + //class ARMSubtarget; + +/// ARMMCInstLower - This class is used to lower an MachineInstr into an MCInst. +class VISIBILITY_HIDDEN ARMMCInstLower { + MCContext &Ctx; + Mangler &Mang; + AsmPrinter &Printer; + + //const ARMSubtarget &getSubtarget() const; +public: + ARMMCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer) + : Ctx(ctx), Mang(mang), Printer(printer) {} + + void Lower(const MachineInstr *MI, MCInst &OutMI) const; + + //MCSymbol *GetPICBaseSymbol() const; + MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; + MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; + MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const; + MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const; + MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; + +/* +private: + MachineModuleInfoMachO &getMachOMMI() const; + */ +}; + +} + +#endif diff --git a/lib/Target/ARM/AsmPrinter/CMakeLists.txt b/lib/Target/ARM/AsmPrinter/CMakeLists.txt index a67fc84..4e299f8 100644 --- a/lib/Target/ARM/AsmPrinter/CMakeLists.txt +++ b/lib/Target/ARM/AsmPrinter/CMakeLists.txt @@ -2,5 +2,7 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/ add_llvm_library(LLVMARMAsmPrinter ARMAsmPrinter.cpp + ARMInstPrinter.cpp + ARMMCInstLower.cpp ) -add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen)
\ No newline at end of file +add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen) diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt index a961a57..e7770b2 100644 --- a/lib/Target/ARM/README-Thumb.txt +++ b/lib/Target/ARM/README-Thumb.txt @@ -196,14 +196,6 @@ This is especially bad when dynamic alloca is used. The all fixed size stack objects are referenced off the frame pointer with negative offsets. See oggenc for an example. -//===---------------------------------------------------------------------===// - -We are reserving R3 as a scratch register under thumb mode. So if it is live in -to the function, we save / restore R3 to / from R12. Until register scavenging -is done, we should save R3 to a high callee saved reg at emitPrologue time -(when hasFP is true or stack size is large) and restore R3 from that register -instead. This allows us to at least get rid of the save to r12 everytime it is -used. //===---------------------------------------------------------------------===// diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 3c896da..6207177 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -32,7 +32,6 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -394,31 +393,48 @@ rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, return 0; } -/// saveScavengerRegister - Save the register so it can be used by the +/// saveScavengerRegister - Spill the register so it can be used by the /// register scavenger. Return true. -bool Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - const TargetRegisterClass *RC, - unsigned Reg) const { +bool +Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &UseMI, + const TargetRegisterClass *RC, + unsigned Reg) const { // Thumb1 can't use the emergency spill slot on the stack because // ldr/str immediate offsets must be positive, and if we're referencing // off the frame pointer (if, for example, there are alloca() calls in // the function, the offset will be negative. Use R12 instead since that's // a call clobbered register that we know won't be used in Thumb1 mode. + DebugLoc DL = DebugLoc::getUnknownLoc(); + BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)). + addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill); + + // The UseMI is where we would like to restore the register. If there's + // interference with R12 before then, however, we'll need to restore it + // before that instead and adjust the UseMI. + bool done = false; + for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) { + // If this instruction affects R12, adjust our restore point. + for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = II->getOperand(i); + if (!MO.isReg() || MO.isUndef() || !MO.getReg() || + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + if (MO.getReg() == ARM::R12) { + UseMI = II; + done = true; + break; + } + } + } + // Restore the register from R12 + BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)). + addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill); - TII.copyRegToReg(MBB, I, ARM::R12, Reg, ARM::GPRRegisterClass, RC); return true; } -/// restoreScavengerRegister - restore a registers saved by -// saveScavengerRegister(). -void Thumb1RegisterInfo::restoreScavengerRegister(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - const TargetRegisterClass *RC, - unsigned Reg) const { - TII.copyRegToReg(MBB, I, Reg, ARM::R12, RC, ARM::GPRRegisterClass); -} - unsigned Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, int *Value, @@ -828,7 +844,6 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, if (VARegSaveSize) { // Epilogue for vararg functions: pop LR to R3 and branch off it. - // FIXME: Verify this is still ok when R3 is no longer being reserved. AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) .addReg(0) // No write back. .addReg(ARM::R3, RegState::Define); diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index bb7a619..570a5bc 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -57,12 +57,9 @@ public: bool saveScavengerRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &UseMI, const TargetRegisterClass *RC, unsigned Reg) const; - void restoreScavengerRegister(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - const TargetRegisterClass *RC, - unsigned Reg) const; unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, int *Value = NULL, RegScavenger *RS = NULL) const; diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index 98b5cbd..427c0bb 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -107,8 +107,12 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) { // Finalize IT mask. ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); unsigned Mask = 0, Pos = 3; - while (MBBI != E && Pos) { + // Branches, including tricky ones like LDM_RET, need to end an IT + // block so check the instruction we just put in the block. + while (MBBI != E && Pos && + (!MI->getDesc().isBranch() && !MI->getDesc().isReturn())) { MachineInstr *NMI = &*MBBI; + MI = NMI; DebugLoc ndl = NMI->getDebugLoc(); unsigned NPredReg = 0; ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg); diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp index 6c4c15d..f217e0e 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -32,7 +32,6 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; diff --git a/lib/Target/Blackfin/Blackfin.td b/lib/Target/Blackfin/Blackfin.td index b904638..cd90962 100644 --- a/lib/Target/Blackfin/Blackfin.td +++ b/lib/Target/Blackfin/Blackfin.td @@ -74,6 +74,7 @@ def WA_IND_CALL : SubtargetFeature<"ind-call-anomaly", "wa_ind_call", "true", include "BlackfinRegisterInfo.td" include "BlackfinCallingConv.td" +include "BlackfinIntrinsics.td" include "BlackfinInstrInfo.td" def BlackfinInstrInfo : InstrInfo {} diff --git a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp new file mode 100644 index 0000000..544dc68 --- /dev/null +++ b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp @@ -0,0 +1,53 @@ +//===- BlackfinIntrinsicInfo.cpp - Intrinsic Information --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Blackfin implementation of TargetIntrinsicInfo. +// +//===----------------------------------------------------------------------===// + +#include "BlackfinIntrinsicInfo.h" +#include "llvm/Intrinsics.h" +#include "llvm/Support/raw_ostream.h" +#include <cstring> + +using namespace llvm; + +namespace bfinIntrinsic { + + enum ID { + last_non_bfin_intrinsic = Intrinsic::num_intrinsics-1, +#define GET_INTRINSIC_ENUM_VALUES +#include "BlackfinGenIntrinsics.inc" +#undef GET_INTRINSIC_ENUM_VALUES + , num_bfin_intrinsics + }; + +} + +const char *BlackfinIntrinsicInfo::getName(unsigned IntrID) const { + static const char *const names[] = { +#define GET_INTRINSIC_NAME_TABLE +#include "BlackfinGenIntrinsics.inc" +#undef GET_INTRINSIC_NAME_TABLE + }; + + if (IntrID < Intrinsic::num_intrinsics) + return 0; + assert(IntrID < bfinIntrinsic::num_bfin_intrinsics && "Invalid intrinsic ID"); + + return names[IntrID - Intrinsic::num_intrinsics]; +} + +unsigned +BlackfinIntrinsicInfo::lookupName(const char *Name, unsigned Len) const { +#define GET_FUNCTION_RECOGNIZER +#include "BlackfinGenIntrinsics.inc" +#undef GET_FUNCTION_RECOGNIZER + return 0; +} diff --git a/lib/Target/Blackfin/BlackfinIntrinsicInfo.h b/lib/Target/Blackfin/BlackfinIntrinsicInfo.h new file mode 100644 index 0000000..3b59a60 --- /dev/null +++ b/lib/Target/Blackfin/BlackfinIntrinsicInfo.h @@ -0,0 +1,28 @@ +//===- BlackfinIntrinsicInfo.h - Blackfin Intrinsic Information -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Blackfin implementation of TargetIntrinsicInfo. +// +//===----------------------------------------------------------------------===// +#ifndef BLACKFININTRINSICS_H +#define BLACKFININTRINSICS_H + +#include "llvm/Target/TargetIntrinsicInfo.h" + +namespace llvm { + + class BlackfinIntrinsicInfo : public TargetIntrinsicInfo { + public: + const char *getName(unsigned IntrID) const; + unsigned lookupName(const char *Name, unsigned Len) const; + }; + +} + +#endif diff --git a/lib/Target/Blackfin/BlackfinIntrinsics.td b/lib/Target/Blackfin/BlackfinIntrinsics.td new file mode 100644 index 0000000..bf02cfe --- /dev/null +++ b/lib/Target/Blackfin/BlackfinIntrinsics.td @@ -0,0 +1,34 @@ +//===- BlackfinIntrinsics.td - Defines Blackfin intrinsics -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the blackfin-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +let TargetPrefix = "bfin", isTarget = 1 in { + +//===----------------------------------------------------------------------===// +// Core synchronisation etc. +// +// These intrinsics have sideeffects. Each represent a single instruction, but +// workarounds are sometimes required depending on the cpu. + +// Execute csync instruction with workarounds +def int_bfin_csync : GCCBuiltin<"__builtin_bfin_csync">, + Intrinsic<[llvm_void_ty]>; + +// Execute ssync instruction with workarounds +def int_bfin_ssync : GCCBuiltin<"__builtin_bfin_ssync">, + Intrinsic<[llvm_void_ty]>; + +// Execute idle instruction with workarounds +def int_bfin_idle : GCCBuiltin<"__builtin_bfin_idle">, + Intrinsic<[llvm_void_ty]>; + +} diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.h b/lib/Target/Blackfin/BlackfinTargetMachine.h index 73ed314..a14052b 100644 --- a/lib/Target/Blackfin/BlackfinTargetMachine.h +++ b/lib/Target/Blackfin/BlackfinTargetMachine.h @@ -20,6 +20,7 @@ #include "BlackfinInstrInfo.h" #include "BlackfinSubtarget.h" #include "BlackfinISelLowering.h" +#include "BlackfinIntrinsicInfo.h" namespace llvm { @@ -29,6 +30,7 @@ namespace llvm { BlackfinTargetLowering TLInfo; BlackfinInstrInfo InstrInfo; TargetFrameInfo FrameInfo; + BlackfinIntrinsicInfo IntrinsicInfo; public: BlackfinTargetMachine(const Target &T, const std::string &TT, const std::string &FS); @@ -47,6 +49,9 @@ namespace llvm { virtual const TargetData *getTargetData() const { return &DataLayout; } virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + const TargetIntrinsicInfo *getIntrinsicInfo() const { + return &IntrinsicInfo; + } }; } // end namespace llvm diff --git a/lib/Target/Blackfin/CMakeLists.txt b/lib/Target/Blackfin/CMakeLists.txt index 6c3b244..deb005d 100644 --- a/lib/Target/Blackfin/CMakeLists.txt +++ b/lib/Target/Blackfin/CMakeLists.txt @@ -9,9 +9,11 @@ tablegen(BlackfinGenAsmWriter.inc -gen-asm-writer) tablegen(BlackfinGenDAGISel.inc -gen-dag-isel) tablegen(BlackfinGenSubtarget.inc -gen-subtarget) tablegen(BlackfinGenCallingConv.inc -gen-callingconv) +tablegen(BlackfinGenIntrinsics.inc -gen-tgt-intrinsic) add_llvm_target(BlackfinCodeGen BlackfinInstrInfo.cpp + BlackfinIntrinsicInfo.cpp BlackfinISelDAGToDAG.cpp BlackfinISelLowering.cpp BlackfinMCAsmInfo.cpp diff --git a/lib/Target/Blackfin/Makefile b/lib/Target/Blackfin/Makefile index c0c1bce..c68760b 100644 --- a/lib/Target/Blackfin/Makefile +++ b/lib/Target/Blackfin/Makefile @@ -15,7 +15,7 @@ BUILT_SOURCES = BlackfinGenRegisterInfo.h.inc BlackfinGenRegisterNames.inc \ BlackfinGenRegisterInfo.inc BlackfinGenInstrNames.inc \ BlackfinGenInstrInfo.inc BlackfinGenAsmWriter.inc \ BlackfinGenDAGISel.inc BlackfinGenSubtarget.inc \ - BlackfinGenCallingConv.inc + BlackfinGenCallingConv.inc BlackfinGenIntrinsics.inc DIRS = AsmPrinter TargetInfo diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index fe63edf..cbf769b 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -302,7 +302,6 @@ namespace { void visitInlineAsm(CallInst &I); bool visitBuiltinCall(CallInst &I, Intrinsic::ID ID, bool &WroteCallee); - void visitMallocInst(MallocInst &I); void visitAllocaInst(AllocaInst &I); void visitFreeInst (FreeInst &I); void visitLoadInst (LoadInst &I); @@ -3405,10 +3404,6 @@ void CWriter::visitInlineAsm(CallInst &CI) { Out << ")"; } -void CWriter::visitMallocInst(MallocInst &I) { - llvm_unreachable("lowerallocations pass didn't work!"); -} - void CWriter::visitAllocaInst(AllocaInst &I) { Out << '('; printType(Out, I.getType()); @@ -3690,7 +3685,7 @@ bool CTargetMachine::addPassesToEmitWholeFile(PassManager &PM, if (FileType != TargetMachine::AssemblyFile) return true; PM.add(createGCLoweringPass()); - PM.add(createLowerAllocationsPass(true)); + PM.add(createLowerAllocationsPass()); PM.add(createLowerInvokePass()); PM.add(createCFGSimplificationPass()); // clean up after lower invoke. PM.add(new CBackendNameAllUsedStructsAndMergeFunctions()); diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 14ad451..45c2a7b 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -1258,20 +1258,6 @@ namespace { Out << "\");"; break; } - case Instruction::Malloc: { - const MallocInst* mallocI = cast<MallocInst>(I); - Out << "MallocInst* " << iName << " = new MallocInst(" - << getCppName(mallocI->getAllocatedType()) << ", "; - if (mallocI->isArrayAllocation()) - Out << opNames[0] << ", " ; - Out << "\""; - printEscapedString(mallocI->getName()); - Out << "\", " << bbname << ");"; - if (mallocI->getAlignment()) - nl(Out) << iName << "->setAlignment(" - << mallocI->getAlignment() << ");"; - break; - } case Instruction::Free: { Out << "FreeInst* " << iName << " = new FreeInst(" << getCppName(I->getOperand(0)) << ", " << bbname << ");"; diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp index 26d637b..cf08a97 100644 --- a/lib/Target/MSIL/MSILWriter.cpp +++ b/lib/Target/MSIL/MSILWriter.cpp @@ -1191,9 +1191,6 @@ void MSILWriter::printInstruction(const Instruction* Inst) { case Instruction::Alloca: printAllocaInstruction(cast<AllocaInst>(Inst)); break; - case Instruction::Malloc: - llvm_unreachable("LowerAllocationsPass used"); - break; case Instruction::Free: llvm_unreachable("LowerAllocationsPass used"); break; @@ -1702,7 +1699,7 @@ bool MSILTarget::addPassesToEmitWholeFile(PassManager &PM, if (FileType != TargetMachine::AssemblyFile) return true; MSILWriter* Writer = new MSILWriter(o); PM.add(createGCLoweringPass()); - PM.add(createLowerAllocationsPass(true)); + PM.add(createLowerAllocationsPass()); // FIXME: Handle switch trougth native IL instruction "switch" PM.add(createLowerSwitchPass()); PM.add(createCFGSimplificationPass()); diff --git a/lib/Target/MSP430/AsmPrinter/CMakeLists.txt b/lib/Target/MSP430/AsmPrinter/CMakeLists.txt index 6e66887..f1eb885 100644 --- a/lib/Target/MSP430/AsmPrinter/CMakeLists.txt +++ b/lib/Target/MSP430/AsmPrinter/CMakeLists.txt @@ -1,6 +1,8 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) add_llvm_library(LLVMMSP430AsmPrinter + MSP430InstPrinter.cpp MSP430AsmPrinter.cpp + MSP430MCInstLower.cpp ) add_dependencies(LLVMMSP430AsmPrinter MSP430CodeGenTable_gen) diff --git a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp index 852019f..ace358e 100644 --- a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp +++ b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp @@ -15,7 +15,9 @@ #define DEBUG_TYPE "asm-printer" #include "MSP430.h" #include "MSP430InstrInfo.h" +#include "MSP430InstPrinter.h" #include "MSP430MCAsmInfo.h" +#include "MSP430MCInstLower.h" #include "MSP430TargetMachine.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" @@ -26,12 +28,14 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/Mangler.h" @@ -41,6 +45,10 @@ using namespace llvm; STATISTIC(EmittedInsts, "Number of machine instrs printed"); +static cl::opt<bool> +EnableMCInst("enable-msp430-mcinst-printer", cl::Hidden, + cl::desc("enable experimental mcinst gunk in the msp430 backend")); + namespace { class VISIBILITY_HIDDEN MSP430AsmPrinter : public AsmPrinter { public: @@ -52,8 +60,14 @@ namespace { return "MSP430 Assembly Printer"; } + void printMCInst(const MCInst *MI) { + MSP430InstPrinter(O, *MAI).printInstruction(MI); + } void printOperand(const MachineInstr *MI, int OpNum, const char* Modifier = 0); + void printPCRelImmOperand(const MachineInstr *MI, int OpNum) { + printOperand(MI, OpNum); + } void printSrcMemOperand(const MachineInstr *MI, int OpNum, const char* Modifier = 0); void printCCOperand(const MachineInstr *MI, int OpNum); @@ -67,6 +81,7 @@ namespace { bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode); + void printInstructionThroughMCStreamer(const MachineInstr *MI); void emitFunctionHeader(const MachineFunction &MF); bool runOnMachineFunction(MachineFunction &F); @@ -148,7 +163,11 @@ void MSP430AsmPrinter::printMachineInstruction(const MachineInstr *MI) { processDebugLoc(MI, true); // Call the autogenerated instruction printer routines. - printInstruction(MI); + if (EnableMCInst) { + printInstructionThroughMCStreamer(MI); + } else { + printInstruction(MI); + } if (VerboseAsm && !MI->getDebugLoc().isUnknown()) EmitComments(*MI); @@ -231,22 +250,22 @@ void MSP430AsmPrinter::printCCOperand(const MachineInstr *MI, int OpNum) { default: llvm_unreachable("Unsupported CC code"); break; - case MSP430::COND_E: + case MSP430CC::COND_E: O << "eq"; break; - case MSP430::COND_NE: + case MSP430CC::COND_NE: O << "ne"; break; - case MSP430::COND_HS: + case MSP430CC::COND_HS: O << "hs"; break; - case MSP430::COND_LO: + case MSP430CC::COND_LO: O << "lo"; break; - case MSP430::COND_GE: + case MSP430CC::COND_GE: O << "ge"; break; - case MSP430::COND_L: + case MSP430CC::COND_L: O << 'l'; break; } @@ -275,6 +294,36 @@ bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } +//===----------------------------------------------------------------------===// +void MSP430AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) +{ + + MSP430MCInstLower MCInstLowering(OutContext, *Mang, *this); + + switch (MI->getOpcode()) { + case TargetInstrInfo::DBG_LABEL: + case TargetInstrInfo::EH_LABEL: + case TargetInstrInfo::GC_LABEL: + printLabel(MI); + return; + case TargetInstrInfo::KILL: + return; + case TargetInstrInfo::INLINEASM: + O << '\t'; + printInlineAsm(MI); + return; + case TargetInstrInfo::IMPLICIT_DEF: + printImplicitDef(MI); + return; + default: break; + } + + MCInst TmpInst; + MCInstLowering.Lower(MI, TmpInst); + + printMCInst(&TmpInst); +} + // Force static initialization. extern "C" void LLVMInitializeMSP430AsmPrinter() { RegisterAsmPrinter<MSP430AsmPrinter> X(TheMSP430Target); diff --git a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp new file mode 100644 index 0000000..a3ecc67 --- /dev/null +++ b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp @@ -0,0 +1,116 @@ +//===-- MSP430InstPrinter.cpp - Convert MSP430 MCInst to assembly syntax --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an MSP430 MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "MSP430.h" +#include "MSP430InstrInfo.h" +#include "MSP430InstPrinter.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +using namespace llvm; + + +// Include the auto-generated portion of the assembly writer. +#define MachineInstr MCInst +#define MSP430AsmPrinter MSP430InstPrinter // FIXME: REMOVE. +#define NO_ASM_WRITER_BOILERPLATE +#include "MSP430GenAsmWriter.inc" +#undef MachineInstr +#undef MSP430AsmPrinter + +void MSP430InstPrinter::printInst(const MCInst *MI) { + printInstruction(MI); +} + +void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) + O << Op.getImm(); + else { + assert(Op.isExpr() && "unknown pcrel immediate operand"); + Op.getExpr()->print(O, &MAI); + } +} + +void MSP430InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + const char *Modifier) { + assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + O << getRegisterName(Op.getReg()); + } else if (Op.isImm()) { + O << '#' << Op.getImm(); + } else { + assert(Op.isExpr() && "unknown operand kind in printOperand"); + O << '#'; + Op.getExpr()->print(O, &MAI); + } +} + +void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo, + const char *Modifier) { + const MCOperand &Base = MI->getOperand(OpNo); + const MCOperand &Disp = MI->getOperand(OpNo+1); + + // FIXME: move global to displacement field! + if (Base.isExpr()) { + O << '&'; + Base.getExpr()->print(O, &MAI); + } else if (Disp.isImm() && !Base.isReg()) + printOperand(MI, OpNo); + else if (Base.isReg()) { + if (Disp.getImm()) { + O << Disp.getImm() << '('; + printOperand(MI, OpNo); + O << ')'; + } else { + O << '@'; + printOperand(MI, OpNo); + } + } else { + Base.dump(); + Disp.dump(); + llvm_unreachable("Unsupported memory operand"); + } +} + +void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo) { + unsigned CC = MI->getOperand(OpNo).getImm(); + + switch (CC) { + default: + llvm_unreachable("Unsupported CC code"); + break; + case MSP430CC::COND_E: + O << "eq"; + break; + case MSP430CC::COND_NE: + O << "ne"; + break; + case MSP430CC::COND_HS: + O << "hs"; + break; + case MSP430CC::COND_LO: + O << "lo"; + break; + case MSP430CC::COND_GE: + O << "ge"; + break; + case MSP430CC::COND_L: + O << 'l'; + break; + } +} diff --git a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.h new file mode 100644 index 0000000..2fac800 --- /dev/null +++ b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.h @@ -0,0 +1,46 @@ +//===-- MSP430InstPrinter.h - Convert MSP430 MCInst to assembly syntax ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints a MSP430 MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef MSP430INSTPRINTER_H +#define MSP430INSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm +{ + + class MCOperand; + + class MSP430InstPrinter : public MCInstPrinter { + public: + MSP430InstPrinter(raw_ostream &O, const MCAsmInfo &MAI) : + MCInstPrinter(O, MAI){ + } + + virtual void printInst(const MCInst *MI); + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI); + static const char *getRegisterName(unsigned RegNo); + + void printOperand(const MCInst *MI, unsigned OpNo, + const char *Modifier = 0); + void printPCRelImmOperand(const MCInst *MI, unsigned OpNo); + void printSrcMemOperand(const MCInst *MI, unsigned OpNo, + const char *Modifier = 0); + void printCCOperand(const MCInst *MI, unsigned OpNo); + + }; +} + +#endif diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp new file mode 100644 index 0000000..f505b23 --- /dev/null +++ b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp @@ -0,0 +1,147 @@ +//===-- MSP430MCInstLower.cpp - Convert MSP430 MachineInstr to an MCInst---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower MSP430 MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "MSP430MCInstLower.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Mangler.h" +#include "llvm/ADT/SmallString.h" +using namespace llvm; + +MCSymbol *MSP430MCInstLower:: +GetGlobalAddressSymbol(const MachineOperand &MO) const { + const GlobalValue *GV = MO.getGlobal(); + + SmallString<128> Name; + Mang.getNameWithPrefix(Name, GV, false); + + switch (MO.getTargetFlags()) { + default: llvm_unreachable(0 && "Unknown target flag on GV operand"); + case 0: break; + } + + return Ctx.GetOrCreateSymbol(Name.str()); +} + +MCSymbol *MSP430MCInstLower:: +GetExternalSymbolSymbol(const MachineOperand &MO) const { + SmallString<128> Name; + Name += Printer.MAI->getGlobalPrefix(); + Name += MO.getSymbolName(); + + switch (MO.getTargetFlags()) { + default: assert(0 && "Unknown target flag on GV operand"); + case 0: break; + } + + return Ctx.GetOrCreateSymbol(Name.str()); +} + +MCSymbol *MSP430MCInstLower:: +GetJumpTableSymbol(const MachineOperand &MO) const { + SmallString<256> Name; + raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI" + << Printer.getFunctionNumber() << '_' + << MO.getIndex(); + + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on GV operand"); + case 0: break; + } + + // Create a symbol for the name. + return Ctx.GetOrCreateSymbol(Name.str()); +} + +MCSymbol *MSP430MCInstLower:: +GetConstantPoolIndexSymbol(const MachineOperand &MO) const { + SmallString<256> Name; + raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI" + << Printer.getFunctionNumber() << '_' + << MO.getIndex(); + + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on GV operand"); + case 0: break; + } + + // Create a symbol for the name. + return Ctx.GetOrCreateSymbol(Name.str()); +} + +MCOperand MSP430MCInstLower:: +LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { + // FIXME: We would like an efficient form for this, so we don't have to do a + // lot of extra uniquing. + const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on GV operand"); + case 0: break; + } + + if (!MO.isJTI() && MO.getOffset()) + Expr = MCBinaryExpr::CreateAdd(Expr, + MCConstantExpr::Create(MO.getOffset(), Ctx), + Ctx); + return MCOperand::CreateExpr(Expr); +} + +void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + MCOperand MCOp; + switch (MO.getType()) { + default: + MI->dump(); + assert(0 && "unknown operand type"); + case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) continue; + MCOp = MCOperand::CreateReg(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( + Printer.GetMBBSymbol(MO.getMBB()->getNumber()), Ctx)); + break; + case MachineOperand::MO_GlobalAddress: + MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); + break; + case MachineOperand::MO_ExternalSymbol: + MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO)); + break; + case MachineOperand::MO_JumpTableIndex: + MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO)); + break; + case MachineOperand::MO_ConstantPoolIndex: + MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO)); + break; + } + + OutMI.addOperand(MCOp); + } +} diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h new file mode 100644 index 0000000..a2b99ae --- /dev/null +++ b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h @@ -0,0 +1,49 @@ +//===-- MSP430MCInstLower.h - Lower MachineInstr to MCInst ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef MSP430_MCINSTLOWER_H +#define MSP430_MCINSTLOWER_H + +#include "llvm/Support/Compiler.h" + +namespace llvm { + class AsmPrinter; + class MCAsmInfo; + class MCContext; + class MCInst; + class MCOperand; + class MCSymbol; + class MachineInstr; + class MachineModuleInfoMachO; + class MachineOperand; + class Mangler; + + /// MSP430MCInstLower - This class is used to lower an MachineInstr + /// into an MCInst. +class VISIBILITY_HIDDEN MSP430MCInstLower { + MCContext &Ctx; + Mangler &Mang; + + AsmPrinter &Printer; +public: + MSP430MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer) + : Ctx(ctx), Mang(mang), Printer(printer) {} + void Lower(const MachineInstr *MI, MCInst &OutMI) const; + + MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; + + MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; + MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; + MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const; + MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const; +}; + +} + +#endif diff --git a/lib/Target/MSP430/MSP430.h b/lib/Target/MSP430/MSP430.h index d9f5f86..1ff178d 100644 --- a/lib/Target/MSP430/MSP430.h +++ b/lib/Target/MSP430/MSP430.h @@ -17,6 +17,20 @@ #include "llvm/Target/TargetMachine.h" +namespace MSP430CC { + // MSP430 specific condition code. + enum CondCodes { + COND_E = 0, // aka COND_Z + COND_NE = 1, // aka COND_NZ + COND_HS = 2, // aka COND_C + COND_LO = 3, // aka COND_NC + COND_GE = 4, + COND_L = 5, + + COND_INVALID = -1 + }; +} + namespace llvm { class MSP430TargetMachine; class FunctionPass; diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index 4195a88..b7d9282 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -34,6 +35,14 @@ using namespace llvm; +#ifndef NDEBUG +static cl::opt<bool> +ViewRMWDAGs("view-msp430-rmw-dags", cl::Hidden, + cl::desc("Pop up a window to show isel dags after RMW preprocess")); +#else +static const bool ViewRMWDAGs = false; +#endif + STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); /// MSP430DAGToDAGISel - MSP430 specific code to select MSP430 machine @@ -56,6 +65,9 @@ namespace { return "MSP430 DAG->DAG Pattern Instruction Selection"; } + bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, + SDNode *Root) const; + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps); @@ -64,6 +76,7 @@ namespace { #include "MSP430GenDAGISel.inc" private: + DenseMap<SDNode*, SDNode*> RMWStores; void PreprocessForRMW(); SDNode *Select(SDValue Op); bool SelectAddr(SDValue Op, SDValue Addr, SDValue &Base, SDValue &Disp); @@ -130,7 +143,6 @@ bool MSP430DAGToDAGISel::SelectAddr(SDValue Op, SDValue Addr, return true; } - bool MSP430DAGToDAGISel:: SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) { @@ -148,31 +160,54 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, return false; } +bool MSP430DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, + SDNode *Root) const { + if (OptLevel == CodeGenOpt::None) return false; + + /// RMW preprocessing creates the following code: + /// [Load1] + /// ^ ^ + /// / | + /// / | + /// [Load2] | + /// ^ ^ | + /// | | | + /// | \-| + /// | | + /// | [Op] + /// | ^ + /// | | + /// \ / + /// \ / + /// [Store] + /// + /// The path Store => Load2 => Load1 is via chain. Note that in general it is + /// not allowed to fold Load1 into Op (and Store) since it will creates a + /// cycle. However, this is perfectly legal for the loads moved below the + /// TokenFactor by PreprocessForRMW. Query the map Store => Load1 (created + /// during preprocessing) to determine whether it's legal to introduce such + /// "cycle" for a moment. + DenseMap<SDNode*, SDNode*>::iterator I = RMWStores.find(Root); + if (I != RMWStores.end() && I->second == N) + return true; + + // Proceed to 'generic' cycle finder code + return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root); +} + + /// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand /// and move load below the TokenFactor. Replace store's chain operand with /// load's chain result. -/// Shamelessly stolen from X86. static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load, SDValue Store, SDValue TF) { SmallVector<SDValue, 4> Ops; - bool isRMW = false; - SDValue TF0, TF1, NewTF; for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) - if (Load.getNode() == TF.getOperand(i).getNode()) { - TF0 = Load.getOperand(0); - Ops.push_back(TF0); - } else { - TF1 = TF.getOperand(i); - Ops.push_back(TF1); - if (LoadSDNode* LD = dyn_cast<LoadSDNode>(TF1)) - isRMW = !LD->isVolatile(); - } - - if (isRMW && TF1.getOperand(0).getNode() == TF0.getNode()) - NewTF = TF0; - else - NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); - + if (Load.getNode() == TF.getOperand(i).getNode()) + Ops.push_back(Load.getOperand(0)); + else + Ops.push_back(TF.getOperand(i)); + SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF, Load.getOperand(1), Load.getOperand(2)); @@ -180,12 +215,43 @@ static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load, Store.getOperand(2), Store.getOperand(3)); } -/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. The chain -/// produced by the load must only be used by the store's chain operand, -/// otherwise this may produce a cycle in the DAG. -/// Shamelessly stolen from X86. FIXME: Should we make this function common? -static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, - SDValue &Load) { +/// MoveBelowTokenFactor2 - Replace TokenFactor operand with load's chain operand +/// and move load below the TokenFactor. Replace store's chain operand with +/// load's chain result. This a version which sinks two loads below token factor. +/// Look into PreprocessForRMW comments for explanation of transform. +static void MoveBelowTokenFactor2(SelectionDAG *CurDAG, + SDValue Load1, SDValue Load2, + SDValue Store, SDValue TF) { + SmallVector<SDValue, 4> Ops; + for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) { + SDNode* N = TF.getOperand(i).getNode(); + if (Load2.getNode() == N) + Ops.push_back(Load2.getOperand(0)); + else if (Load1.getNode() != N) + Ops.push_back(TF.getOperand(i)); + } + + SDValue NewTF = SDValue(CurDAG->MorphNodeTo(TF.getNode(), + TF.getOpcode(), + TF.getNode()->getVTList(), + &Ops[0], Ops.size()), TF.getResNo()); + SDValue NewLoad2 = CurDAG->UpdateNodeOperands(Load2, NewTF, + Load2.getOperand(1), + Load2.getOperand(2)); + + SDValue NewLoad1 = CurDAG->UpdateNodeOperands(Load1, NewLoad2.getValue(1), + Load1.getOperand(1), + Load1.getOperand(2)); + + CurDAG->UpdateNodeOperands(Store, + NewLoad1.getValue(1), + Store.getOperand(1), + Store.getOperand(2), Store.getOperand(3)); +} + +/// isAllowedToSink - return true if N a load which can be moved below token +/// factor. Basically, the load should be non-volatile and has single use. +static bool isLoadAllowedToSink(SDValue N, SDValue Chain) { if (N.getOpcode() == ISD::BIT_CONVERT) N = N.getOperand(0); @@ -199,10 +265,19 @@ static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD) return false; - if (N.hasOneUse() && - LD->hasNUsesOfValue(1, 1) && - N.getOperand(1) == Address && - LD->isOperandOf(Chain.getNode())) { + return (N.hasOneUse() && + LD->hasNUsesOfValue(1, 1) && + LD->isOperandOf(Chain.getNode())); +} + + +/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. +/// The chain produced by the load must only be used by the store's chain +/// operand, otherwise this may produce a cycle in the DAG. +static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, + SDValue &Load) { + if (isLoadAllowedToSink(N, Chain) && + N.getOperand(1) == Address) { Load = N; return true; } @@ -210,57 +285,140 @@ static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, } /// PreprocessForRMW - Preprocess the DAG to make instruction selection better. -/// Shamelessly stolen from X86. +/// This is only run if not in -O0 mode. +/// This allows the instruction selector to pick more read-modify-write +/// instructions. This is a common case: +/// +/// [Load chain] +/// ^ +/// | +/// [Load] +/// ^ ^ +/// | | +/// / \- +/// / | +/// [TokenFactor] [Op] +/// ^ ^ +/// | | +/// \ / +/// \ / +/// [Store] +/// +/// The fact the store's chain operand != load's chain will prevent the +/// (store (op (load))) instruction from being selected. We can transform it to: +/// +/// [Load chain] +/// ^ +/// | +/// [TokenFactor] +/// ^ +/// | +/// [Load] +/// ^ ^ +/// | | +/// | \- +/// | | +/// | [Op] +/// | ^ +/// | | +/// \ / +/// \ / +/// [Store] +/// +/// We also recognize the case where second operand of Op is load as well and +/// move it below token factor as well creating DAG as follows: +/// +/// [Load chain] +/// ^ +/// | +/// [TokenFactor] +/// ^ +/// | +/// [Load1] +/// ^ ^ +/// / | +/// / | +/// [Load2] | +/// ^ ^ | +/// | | | +/// | \-| +/// | | +/// | [Op] +/// | ^ +/// | | +/// \ / +/// \ / +/// [Store] +/// +/// This allows selection of mem-mem instructions. Yay! + void MSP430DAGToDAGISel::PreprocessForRMW() { for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ++I) { if (!ISD::isNON_TRUNCStore(I)) continue; - SDValue Chain = I->getOperand(0); + if (Chain.getNode()->getOpcode() != ISD::TokenFactor) continue; - SDValue N1 = I->getOperand(1); // Value to store - SDValue N2 = I->getOperand(2); // Address of store - - if (!N1.hasOneUse()) + SDValue N1 = I->getOperand(1); + SDValue N2 = I->getOperand(2); + if ((N1.getValueType().isFloatingPoint() && + !N1.getValueType().isVector()) || + !N1.hasOneUse()) continue; - bool RModW = false; - SDValue Load; + unsigned RModW = 0; + SDValue Load1, Load2; unsigned Opcode = N1.getNode()->getOpcode(); switch (Opcode) { - case ISD::ADD: - case ISD::AND: - case ISD::OR: - case ISD::XOR: - case ISD::ADDC: - case ISD::ADDE: { - SDValue N10 = N1.getOperand(0); - SDValue N11 = N1.getOperand(1); - RModW = isRMWLoad(N10, Chain, N2, Load); - - if (!RModW && isRMWLoad(N11, Chain, N2, Load)) { - // Swap the operands, making the RMW load the first operand seems - // to help selection and prevent token chain loops. - N1 = CurDAG->UpdateNodeOperands(N1, N11, N10); - RModW = true; - } - break; + case ISD::ADD: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::ADDC: + case ISD::ADDE: { + SDValue N10 = N1.getOperand(0); + SDValue N11 = N1.getOperand(1); + if (isRMWLoad(N10, Chain, N2, Load1)) { + if (isLoadAllowedToSink(N11, Chain)) { + Load2 = N11; + RModW = 2; + } else + RModW = 1; + } else if (isRMWLoad(N11, Chain, N2, Load1)) { + if (isLoadAllowedToSink(N10, Chain)) { + Load2 = N10; + RModW = 2; + } else + RModW = 1; } - case ISD::SUB: - case ISD::SUBC: - case ISD::SUBE: { - SDValue N10 = N1.getOperand(0); - RModW = isRMWLoad(N10, Chain, N2, Load); - break; + break; + } + case ISD::SUB: + case ISD::SUBC: + case ISD::SUBE: { + SDValue N10 = N1.getOperand(0); + SDValue N11 = N1.getOperand(1); + if (isRMWLoad(N10, Chain, N2, Load1)) { + if (isLoadAllowedToSink(N11, Chain)) { + Load2 = N11; + RModW = 2; + } else + RModW = 1; } + break; + } } - if (RModW) { - MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain); - ++NumLoadMoved; + NumLoadMoved += RModW; + if (RModW == 1) + MoveBelowTokenFactor(CurDAG, Load1, SDValue(I, 0), Chain); + else if (RModW == 2) { + MoveBelowTokenFactor2(CurDAG, Load1, Load2, SDValue(I, 0), Chain); + SDNode* Store = I; + RMWStores[Store] = Load2.getNode(); } } } @@ -268,8 +426,15 @@ void MSP430DAGToDAGISel::PreprocessForRMW() { /// InstructionSelect - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. void MSP430DAGToDAGISel::InstructionSelect() { + std::string BlockName; + if (ViewRMWDAGs) + BlockName = MF->getFunction()->getNameStr() + ":" + + BB->getBasicBlock()->getNameStr(); + PreprocessForRMW(); + if (ViewRMWDAGs) CurDAG->viewGraph("RMW preprocessed:" + BlockName); + DEBUG(errs() << "Selection DAG after RMW preprocessing:\n"); DEBUG(CurDAG->dump()); @@ -282,6 +447,7 @@ void MSP430DAGToDAGISel::InstructionSelect() { DEBUG(errs() << "===== Instruction selection ends:\n"); CurDAG->RemoveDeadNodes(); + RMWStores.clear(); } SDNode *MSP430DAGToDAGISel::Select(SDValue Op) { diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index b56f069..34e6d2c 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -567,44 +567,45 @@ SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op, return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);; } -static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, unsigned &TargetCC, +static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC, ISD::CondCode CC, DebugLoc dl, SelectionDAG &DAG) { // FIXME: Handle bittests someday assert(!LHS.getValueType().isFloatingPoint() && "We don't handle FP yet"); // FIXME: Handle jump negative someday - TargetCC = MSP430::COND_INVALID; + MSP430CC::CondCodes TCC = MSP430CC::COND_INVALID; switch (CC) { default: llvm_unreachable("Invalid integer condition!"); case ISD::SETEQ: - TargetCC = MSP430::COND_E; // aka COND_Z + TCC = MSP430CC::COND_E; // aka COND_Z break; case ISD::SETNE: - TargetCC = MSP430::COND_NE; // aka COND_NZ + TCC = MSP430CC::COND_NE; // aka COND_NZ break; case ISD::SETULE: std::swap(LHS, RHS); // FALLTHROUGH case ISD::SETUGE: - TargetCC = MSP430::COND_HS; // aka COND_C + TCC = MSP430CC::COND_HS; // aka COND_C break; case ISD::SETUGT: std::swap(LHS, RHS); // FALLTHROUGH case ISD::SETULT: - TargetCC = MSP430::COND_LO; // aka COND_NC + TCC = MSP430CC::COND_LO; // aka COND_NC break; case ISD::SETLE: std::swap(LHS, RHS); // FALLTHROUGH case ISD::SETGE: - TargetCC = MSP430::COND_GE; + TCC = MSP430CC::COND_GE; break; case ISD::SETGT: std::swap(LHS, RHS); // FALLTHROUGH case ISD::SETLT: - TargetCC = MSP430::COND_L; + TCC = MSP430CC::COND_L; break; } + TargetCC = DAG.getConstant(TCC, MVT::i8); return DAG.getNode(MSP430ISD::CMP, dl, MVT::Flag, LHS, RHS); } @@ -617,13 +618,11 @@ SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { SDValue Dest = Op.getOperand(4); DebugLoc dl = Op.getDebugLoc(); - unsigned TargetCC = MSP430::COND_INVALID; + SDValue TargetCC; SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG); return DAG.getNode(MSP430ISD::BR_CC, dl, Op.getValueType(), - Chain, - Dest, DAG.getConstant(TargetCC, MVT::i8), - Flag); + Chain, Dest, TargetCC, Flag); } SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { @@ -634,14 +633,14 @@ SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); DebugLoc dl = Op.getDebugLoc(); - unsigned TargetCC = MSP430::COND_INVALID; + SDValue TargetCC; SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag); SmallVector<SDValue, 4> Ops; Ops.push_back(TrueV); Ops.push_back(FalseV); - Ops.push_back(DAG.getConstant(TargetCC, MVT::i8)); + Ops.push_back(TargetCC); Ops.push_back(Flag); return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size()); diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index 37fbb6d..a6d9638 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -151,6 +151,163 @@ MSP430InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } +unsigned MSP430InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator I = MBB.end(); + unsigned Count = 0; + + while (I != MBB.begin()) { + --I; + if (I->getOpcode() != MSP430::JMP && + I->getOpcode() != MSP430::JCC) + break; + // Remove the branch. + I->eraseFromParent(); + I = MBB.end(); + ++Count; + } + + return Count; +} + +bool MSP430InstrInfo:: +ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { + assert(Cond.size() == 1 && "Invalid Xbranch condition!"); + + MSP430CC::CondCodes CC = static_cast<MSP430CC::CondCodes>(Cond[0].getImm()); + + switch (CC) { + default: + assert(0 && "Invalid branch condition!"); + break; + case MSP430CC::COND_E: + CC = MSP430CC::COND_NE; + break; + case MSP430CC::COND_NE: + CC = MSP430CC::COND_E; + break; + case MSP430CC::COND_L: + CC = MSP430CC::COND_GE; + break; + case MSP430CC::COND_GE: + CC = MSP430CC::COND_L; + break; + case MSP430CC::COND_HS: + CC = MSP430CC::COND_LO; + break; + case MSP430CC::COND_LO: + CC = MSP430CC::COND_HS; + break; + } + + Cond[0].setImm(CC); + return false; +} + +bool MSP430InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB)const{ + if (MBB.empty()) return false; + + switch (MBB.back().getOpcode()) { + case MSP430::RET: // Return. + case MSP430::JMP: // Uncond branch. + return true; + default: return false; + } +} + +bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { + const TargetInstrDesc &TID = MI->getDesc(); + if (!TID.isTerminator()) return false; + + // Conditional branch is a special case. + if (TID.isBranch() && !TID.isBarrier()) + return true; + if (!TID.isPredicable()) + return true; + return !isPredicated(MI); +} + +bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { + // Start from the bottom of the block and work up, examining the + // terminator instructions. + MachineBasicBlock::iterator I = MBB.end(); + while (I != MBB.begin()) { + --I; + // Working from the bottom, when we see a non-terminator + // instruction, we're done. + if (!isUnpredicatedTerminator(I)) + break; + + // A terminator that isn't a branch can't easily be handled + // by this analysis. + if (!I->getDesc().isBranch()) + return true; + + // Handle unconditional branches. + if (I->getOpcode() == MSP430::JMP) { + if (!AllowModify) { + TBB = I->getOperand(0).getMBB(); + continue; + } + + // If the block has any instructions after a JMP, delete them. + while (next(I) != MBB.end()) + next(I)->eraseFromParent(); + Cond.clear(); + FBB = 0; + + // Delete the JMP if it's equivalent to a fall-through. + if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { + TBB = 0; + I->eraseFromParent(); + I = MBB.end(); + continue; + } + + // TBB is used to indicate the unconditinal destination. + TBB = I->getOperand(0).getMBB(); + continue; + } + + // Handle conditional branches. + assert(I->getOpcode() == MSP430::JCC && "Invalid conditional branch"); + MSP430CC::CondCodes BranchCode = + static_cast<MSP430CC::CondCodes>(I->getOperand(1).getImm()); + if (BranchCode == MSP430CC::COND_INVALID) + return true; // Can't handle weird stuff. + + // Working from the bottom, handle the first conditional branch. + if (Cond.empty()) { + FBB = TBB; + TBB = I->getOperand(0).getMBB(); + Cond.push_back(MachineOperand::CreateImm(BranchCode)); + continue; + } + + // Handle subsequent conditional branches. Only handle the case where all + // conditional branches branch to the same destination. + assert(Cond.size() == 1); + assert(TBB); + + // Only handle the case where all conditional branches branch to + // the same destination. + if (TBB != I->getOperand(0).getMBB()) + return true; + + MSP430CC::CondCodes OldBranchCode = (MSP430CC::CondCodes)Cond[0].getImm(); + // If the conditions are the same, we can leave them alone. + if (OldBranchCode == BranchCode) + continue; + + return true; + } + + return false; +} + unsigned MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, @@ -172,7 +329,13 @@ MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, // Conditional branch. unsigned Count = 0; - llvm_unreachable("Implement conditional branches!"); + BuildMI(&MBB, dl, get(MSP430::JCC)).addMBB(TBB).addImm(Cond[0].getImm()); + ++Count; + if (FBB) { + // Two-way Conditional branch. Insert the second branch. + BuildMI(&MBB, dl, get(MSP430::JMP)).addMBB(FBB); + ++Count; + } return Count; } diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h index e07aaca..35e35db 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.h +++ b/lib/Target/MSP430/MSP430InstrInfo.h @@ -21,20 +21,6 @@ namespace llvm { class MSP430TargetMachine; -namespace MSP430 { - // MSP430 specific condition code. - enum CondCode { - COND_E = 0, // aka COND_Z - COND_NE = 1, // aka COND_NZ - COND_HS = 2, // aka COND_C - COND_LO = 3, // aka COND_NC - COND_GE = 4, - COND_L = 5, - - COND_INVALID - }; -} - class MSP430InstrInfo : public TargetInstrInfoImpl { const MSP430RegisterInfo RI; MSP430TargetMachine &TM; @@ -73,9 +59,19 @@ public: MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI) const; - virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; + // Branch folding goodness + bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; + bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; + bool isUnpredicatedTerminator(const MachineInstr *MI) const; + bool AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const; + + unsigned RemoveBranch(MachineBasicBlock &MBB) const; + unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond) const; }; diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index f7e0d2b..e202175 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -71,7 +71,9 @@ def memdst : Operand<i16> { } // Branch targets have OtherVT type. -def brtarget : Operand<OtherVT>; +def brtarget : Operand<OtherVT> { + let PrintMethod = "printPCRelImmOperand"; +} // Operand for printing out a condition code. def cc : Operand<i8> { diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index 5e21f8e..da54507 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -32,7 +32,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, LLVMTargetMachine(T, TT), Subtarget(TT, FS), // FIXME: Check TargetData string. - DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"), + DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32"), InstrInfo(*this), TLInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsDown, 2, -2) { } diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp index 3f415af..ea0f494 100644 --- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp +++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp @@ -12,8 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "PIC16ABINames.h" #include "PIC16AsmPrinter.h" -#include "MCSectionPIC16.h" +#include "PIC16Section.h" #include "PIC16MCAsmInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -39,7 +40,7 @@ PIC16AsmPrinter::PIC16AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, : AsmPrinter(O, TM, T, V), DbgInfo(O, T) { PTLI = static_cast<PIC16TargetLowering*>(TM.getTargetLowering()); PMAI = static_cast<const PIC16MCAsmInfo*>(T); - PTOF = (PIC16TargetObjectFile*)&PTLI->getObjFileLowering(); + PTOF = (PIC16TargetObjectFile *)&PTLI->getObjFileLowering(); } bool PIC16AsmPrinter::printMachineInstruction(const MachineInstr *MI) { @@ -52,6 +53,45 @@ bool PIC16AsmPrinter::printMachineInstruction(const MachineInstr *MI) { return true; } +static int getFunctionColor(const Function *F) { + if (F->hasSection()) { + std::string Sectn = F->getSection(); + std::string StrToFind = "Overlay="; + std::string::size_type Pos = Sectn.find(StrToFind); + + // Retreive the color number if the key is found. + if (Pos != std::string::npos) { + Pos += StrToFind.length(); + std::string Color = ""; + char c = Sectn.at(Pos); + // A Color can only consist of digits. + while (c >= '0' && c<= '9') { + Color.append(1,c); + Pos++; + if (Pos >= Sectn.length()) + break; + c = Sectn.at(Pos); + } + return atoi(Color.c_str()); + } + } + + // Color was not set for function, so return -1. + return -1; +} + +// Color the Auto section of the given function. +void PIC16AsmPrinter::ColorAutoSection(const Function *F) { + std::string SectionName = PAN::getAutosSectionName(CurrentFnName); + PIC16Section* Section = PTOF->findPIC16Section(SectionName); + if (Section != NULL) { + int Color = getFunctionColor(F); + if (Color >= 0) + Section->setColor(Color); + } +} + + /// runOnMachineFunction - This emits the frame section, autos section and /// assembly for each instruction. Also takes care of function begin debug /// directive and file begin debug directive (if required) for the function. @@ -67,17 +107,18 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { const Function *F = MF.getFunction(); CurrentFnName = Mang->getMangledName(F); + // Put the color information from function to its auto section. + ColorAutoSection(F); + // Emit the function frame (args and temps). EmitFunctionFrame(MF); DbgInfo.BeginFunction(MF); - // Emit the autos section of function. - EmitAutos(CurrentFnName); - // Now emit the instructions of function in its code section. - const MCSection *fCodeSection = - getObjFileLowering().getSectionForFunction(CurrentFnName); + const MCSection *fCodeSection + = getObjFileLowering().SectionForCode(CurrentFnName); + // Start the Code Section. O << "\n"; OutStreamer.SwitchSection(fCodeSection); @@ -229,12 +270,26 @@ bool PIC16AsmPrinter::doInitialization(Module &M) { // Set the section names for all globals. for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) { + I != E; ++I) { + + // Record External Var Decls. + if (I->isDeclaration()) { + ExternalVarDecls.push_back(I); + continue; + } + + // Record Exteranl Var Defs. + if (I->hasExternalLinkage() || I->hasCommonLinkage()) { + ExternalVarDefs.push_back(I); + } + + // Sectionify actual data. + if (!I->hasAvailableExternallyLinkage()) { const MCSection *S = getObjFileLowering().SectionForGlobal(I, Mang, TM); - I->setSection(((const MCSectionPIC16*)S)->getName()); + I->setSection(((const PIC16Section *)S)->getName()); } + } DbgInfo.BeginModule(M); EmitFunctionDecls(M); @@ -243,6 +298,7 @@ bool PIC16AsmPrinter::doInitialization(Module &M) { EmitIData(M); EmitUData(M); EmitRomData(M); + EmitUserSections(M); return Result; } @@ -287,7 +343,7 @@ void PIC16AsmPrinter::EmitFunctionDecls(Module &M) { // Emit variables imported from other Modules. void PIC16AsmPrinter::EmitUndefinedVars(Module &M) { - std::vector<const GlobalVariable*> Items = PTOF->ExternalVarDecls->Items; + std::vector<const GlobalVariable*> Items = ExternalVarDecls; if (!Items.size()) return; O << "\n" << MAI->getCommentString() << "Imported Variables - BEGIN" << "\n"; @@ -299,7 +355,7 @@ void PIC16AsmPrinter::EmitUndefinedVars(Module &M) { // Emit variables defined in this module and are available to other modules. void PIC16AsmPrinter::EmitDefinedVars(Module &M) { - std::vector<const GlobalVariable*> Items = PTOF->ExternalVarDefs->Items; + std::vector<const GlobalVariable*> Items = ExternalVarDefs; if (!Items.size()) return; O << "\n" << MAI->getCommentString() << "Exported Variables - BEGIN" << "\n"; @@ -311,25 +367,12 @@ void PIC16AsmPrinter::EmitDefinedVars(Module &M) { // Emit initialized data placed in ROM. void PIC16AsmPrinter::EmitRomData(Module &M) { - // Print ROM Data section. - const std::vector<PIC16Section*> &ROSections = PTOF->ROSections; - for (unsigned i = 0; i < ROSections.size(); i++) { - const std::vector<const GlobalVariable*> &Items = ROSections[i]->Items; - if (!Items.size()) continue; - O << "\n"; - OutStreamer.SwitchSection(PTOF->ROSections[i]->S_); - for (unsigned j = 0; j < Items.size(); j++) { - O << Mang->getMangledName(Items[j]); - Constant *C = Items[j]->getInitializer(); - int AddrSpace = Items[j]->getType()->getAddressSpace(); - EmitGlobalConstant(C, AddrSpace); - } - } + EmitSingleSection(PTOF->ROMDATASection()); } bool PIC16AsmPrinter::doFinalization(Module &M) { + EmitAllAutos(M); printLibcallDecls(); - EmitRemainingAutos(); DbgInfo.EndModule(M); O << "\n\t" << "END\n"; return AsmPrinter::doFinalization(M); @@ -342,8 +385,10 @@ void PIC16AsmPrinter::EmitFunctionFrame(MachineFunction &MF) { // Emit the data section name. O << "\n"; - const MCSection *fPDataSection = - getObjFileLowering().getSectionForFunctionFrame(CurrentFnName); + PIC16Section *fPDataSection = const_cast<PIC16Section *>(getObjFileLowering(). + SectionForFrame(CurrentFnName)); + + fPDataSection->setColor(getFunctionColor(F)); OutStreamer.SwitchSection(fPDataSection); // Emit function frame label @@ -379,106 +424,86 @@ void PIC16AsmPrinter::EmitFunctionFrame(MachineFunction &MF) { O << PAN::getTempdataLabel(CurrentFnName) << " RES " << TempSize << '\n'; } -void PIC16AsmPrinter::EmitIData(Module &M) { - // Print all IDATA sections. - const std::vector<PIC16Section*> &IDATASections = PTOF->IDATASections; - for (unsigned i = 0; i < IDATASections.size(); i++) { - O << "\n"; - if (IDATASections[i]->S_->getName().find("llvm.") != std::string::npos) - continue; - OutStreamer.SwitchSection(IDATASections[i]->S_); - std::vector<const GlobalVariable*> Items = IDATASections[i]->Items; +void PIC16AsmPrinter::EmitInitializedDataSection(const PIC16Section *S) { + /// Emit Section header. + OutStreamer.SwitchSection(S); + + std::vector<const GlobalVariable*> Items = S->Items; for (unsigned j = 0; j < Items.size(); j++) { std::string Name = Mang->getMangledName(Items[j]); Constant *C = Items[j]->getInitializer(); int AddrSpace = Items[j]->getType()->getAddressSpace(); O << Name; EmitGlobalConstant(C, AddrSpace); - } - } + } } -void PIC16AsmPrinter::EmitUData(Module &M) { - const TargetData *TD = TM.getTargetData(); +// Print all IDATA sections. +void PIC16AsmPrinter::EmitIData(Module &M) { + EmitSectionList (M, PTOF->IDATASections()); +} - // Print all BSS sections. - const std::vector<PIC16Section*> &BSSSections = PTOF->BSSSections; - for (unsigned i = 0; i < BSSSections.size(); i++) { - O << "\n"; - OutStreamer.SwitchSection(BSSSections[i]->S_); - std::vector<const GlobalVariable*> Items = BSSSections[i]->Items; +void PIC16AsmPrinter:: +EmitUninitializedDataSection(const PIC16Section *S) { + const TargetData *TD = TM.getTargetData(); + OutStreamer.SwitchSection(S); + std::vector<const GlobalVariable*> Items = S->Items; for (unsigned j = 0; j < Items.size(); j++) { std::string Name = Mang->getMangledName(Items[j]); Constant *C = Items[j]->getInitializer(); const Type *Ty = C->getType(); unsigned Size = TD->getTypeAllocSize(Ty); - O << Name << " RES " << Size << "\n"; } - } } -void PIC16AsmPrinter::EmitAutos(std::string FunctName) { - // Section names for all globals are already set. - const TargetData *TD = TM.getTargetData(); +// Print all UDATA sections. +void PIC16AsmPrinter::EmitUData(Module &M) { + EmitSectionList (M, PTOF->UDATASections()); +} - // Now print Autos section for this function. - std::string SectionName = PAN::getAutosSectionName(FunctName); - const std::vector<PIC16Section*> &AutosSections = PTOF->AutosSections; - for (unsigned i = 0; i < AutosSections.size(); i++) { - O << "\n"; - if (AutosSections[i]->S_->getName() == SectionName) { - // Set the printing status to true - AutosSections[i]->setPrintedStatus(true); - OutStreamer.SwitchSection(AutosSections[i]->S_); - const std::vector<const GlobalVariable*> &Items = AutosSections[i]->Items; - for (unsigned j = 0; j < Items.size(); j++) { - std::string VarName = Mang->getMangledName(Items[j]); - Constant *C = Items[j]->getInitializer(); - const Type *Ty = C->getType(); - unsigned Size = TD->getTypeAllocSize(Ty); - // Emit memory reserve directive. - O << VarName << " RES " << Size << "\n"; - } - break; - } - } +// Print all USER sections. +void PIC16AsmPrinter::EmitUserSections(Module &M) { + EmitSectionList (M, PTOF->USERSections()); } -// Print autos that were not printed during the code printing of functions. -// As the functions might themselves would have got deleted by the optimizer. -void PIC16AsmPrinter::EmitRemainingAutos() { - const TargetData *TD = TM.getTargetData(); +// Print all AUTO sections. +void PIC16AsmPrinter::EmitAllAutos(Module &M) { + EmitSectionList (M, PTOF->AUTOSections()); +} - // Now print Autos section for this function. - std::vector <PIC16Section *>AutosSections = PTOF->AutosSections; - for (unsigned i = 0; i < AutosSections.size(); i++) { - - // if the section is already printed then don't print again - if (AutosSections[i]->isPrinted()) - continue; +extern "C" void LLVMInitializePIC16AsmPrinter() { + RegisterAsmPrinter<PIC16AsmPrinter> X(ThePIC16Target); +} - // Set status as printed - AutosSections[i]->setPrintedStatus(true); +// Emit one data section using correct section emitter based on section type. +void PIC16AsmPrinter::EmitSingleSection(const PIC16Section *S) { + if (S == NULL) return; - O << "\n"; - OutStreamer.SwitchSection(AutosSections[i]->S_); - const std::vector<const GlobalVariable*> &Items = AutosSections[i]->Items; - for (unsigned j = 0; j < Items.size(); j++) { - std::string VarName = Mang->getMangledName(Items[j]); - Constant *C = Items[j]->getInitializer(); - const Type *Ty = C->getType(); - unsigned Size = TD->getTypeAllocSize(Ty); - // Emit memory reserve directive. - O << VarName << " RES " << Size << "\n"; - } + switch (S->getType()) { + default: llvm_unreachable ("unknow user section type"); + case UDATA: + case UDATA_SHR: + case UDATA_OVR: + EmitUninitializedDataSection(S); + break; + case IDATA: + case ROMDATA: + EmitInitializedDataSection(S); + break; } } - -extern "C" void LLVMInitializePIC16AsmPrinter() { - RegisterAsmPrinter<PIC16AsmPrinter> X(ThePIC16Target); +// Emit a list of sections. +void PIC16AsmPrinter:: +EmitSectionList(Module &M, const std::vector<PIC16Section *> &SList) { + for (unsigned i = 0; i < SList.size(); i++) { + // Exclude llvm specific metadata sections. + if (SList[i]->getName().find("llvm.") != std::string::npos) + continue; + O << "\n"; + EmitSingleSection(SList[i]); + } } - diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h index 2dd4600..b13d9ce 100644 --- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h +++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h @@ -53,11 +53,17 @@ namespace llvm { void EmitDefinedVars (Module &M); void EmitIData (Module &M); void EmitUData (Module &M); - void EmitAutos (std::string FunctName); - void EmitRemainingAutos (); + void EmitAllAutos (Module &M); void EmitRomData (Module &M); + void EmitUserSections (Module &M); void EmitFunctionFrame(MachineFunction &MF); void printLibcallDecls(); + void EmitUninitializedDataSection(const PIC16Section *S); + void EmitInitializedDataSection(const PIC16Section *S); + void EmitSingleSection(const PIC16Section *S); + void EmitSectionList(Module &M, + const std::vector< PIC16Section *> &SList); + void ColorAutoSection(const Function *F); protected: bool doInitialization(Module &M); bool doFinalization(Module &M); @@ -74,6 +80,8 @@ namespace llvm { PIC16DbgInfo DbgInfo; const PIC16MCAsmInfo *PMAI; std::list<const char *> LibcallDecls; // List of extern decls. + std::vector<const GlobalVariable *> ExternalVarDecls; + std::vector<const GlobalVariable *> ExternalVarDefs; }; } // end of namespace diff --git a/lib/Target/PIC16/CMakeLists.txt b/lib/Target/PIC16/CMakeLists.txt index 0ee88f9..208b067 100644 --- a/lib/Target/PIC16/CMakeLists.txt +++ b/lib/Target/PIC16/CMakeLists.txt @@ -18,6 +18,7 @@ add_llvm_target(PIC16 PIC16MemSelOpt.cpp PIC16MCAsmInfo.cpp PIC16RegisterInfo.cpp + PIC16Section.cpp PIC16Subtarget.cpp PIC16TargetMachine.cpp PIC16TargetObjectFile.cpp diff --git a/lib/Target/PIC16/Makefile b/lib/Target/PIC16/Makefile index f913675..4382eba7 100644 --- a/lib/Target/PIC16/Makefile +++ b/lib/Target/PIC16/Makefile @@ -17,7 +17,7 @@ BUILT_SOURCES = PIC16GenRegisterInfo.h.inc PIC16GenRegisterNames.inc \ PIC16GenDAGISel.inc PIC16GenCallingConv.inc \ PIC16GenSubtarget.inc -DIRS = AsmPrinter TargetInfo +DIRS = AsmPrinter TargetInfo PIC16Passes include $(LEVEL)/Makefile.common diff --git a/lib/Target/PIC16/PIC16.h b/lib/Target/PIC16/PIC16.h index 8a3704d..e46c9b2 100644 --- a/lib/Target/PIC16/PIC16.h +++ b/lib/Target/PIC16/PIC16.h @@ -42,266 +42,15 @@ namespace PIC16CC { UGE }; } - // A Central class to manage all ABI naming conventions. - // PAN - [P]ic16 [A]BI [N]ames - class PAN { - public: - // Map the name of the symbol to its section name. - // Current ABI: - // ----------------------------------------------------- - // ALL Names are prefixed with the symobl '@'. - // ------------------------------------------------------ - // Global variables do not have any '.' in their names. - // These are maily function names and global variable names. - // Example - @foo, @i - // ------------------------------------------------------- - // Functions and auto variables. - // Names are mangled as <prefix><funcname>.<tag>.<varname> - // Where <prefix> is '@' and <tag> is any one of - // the following - // .auto. - an automatic var of a function. - // .temp. - temproray data of a function. - // .ret. - return value label for a function. - // .frame. - Frame label for a function where retval, args - // and temps are stored. - // .args. - Label used to pass arguments to a direct call. - // Example - Function name: @foo - // Its frame: @foo.frame. - // Its retval: @foo.ret. - // Its local vars: @foo.auto.a - // Its temp data: @foo.temp. - // Its arg passing: @foo.args. - //---------------------------------------------- - // Libcall - compiler generated libcall names must start with .lib. - // This id will be used to emit extern decls for libcalls. - // Example - libcall name: @.lib.sra.i8 - // To pass args: @.lib.sra.i8.args. - // To return val: @.lib.sra.i8.ret. - //---------------------------------------------- - // SECTION Names - // uninitialized globals - @udata.<num>.# - // initialized globals - @idata.<num>.# - // Function frame - @<func>.frame_section. - // Function autos - @<func>.autos_section. - // Declarations - Enclosed in comments. No section for them. - //---------------------------------------------------------- - - // Tags used to mangle different names. - enum TAGS { - PREFIX_SYMBOL, - GLOBAL, - STATIC_LOCAL, - AUTOS_LABEL, - FRAME_LABEL, - RET_LABEL, - ARGS_LABEL, - TEMPS_LABEL, - - LIBCALL, - - FRAME_SECTION, - AUTOS_SECTION, - CODE_SECTION - }; - - // Textual names of the tags. - inline static const char *getTagName(TAGS tag) { - switch (tag) { - default: return ""; - case PREFIX_SYMBOL: return "@"; - case AUTOS_LABEL: return ".auto."; - case FRAME_LABEL: return ".frame."; - case TEMPS_LABEL: return ".temp."; - case ARGS_LABEL: return ".args."; - case RET_LABEL: return ".ret."; - case LIBCALL: return ".lib."; - case FRAME_SECTION: return ".frame_section."; - case AUTOS_SECTION: return ".autos_section."; - case CODE_SECTION: return ".code_section."; - } - } - - // Get tag type for the Symbol. - inline static TAGS getSymbolTag(const std::string &Sym) { - if (Sym.find(getTagName(TEMPS_LABEL)) != std::string::npos) - return TEMPS_LABEL; - - if (Sym.find(getTagName(FRAME_LABEL)) != std::string::npos) - return FRAME_LABEL; - - if (Sym.find(getTagName(RET_LABEL)) != std::string::npos) - return RET_LABEL; - - if (Sym.find(getTagName(ARGS_LABEL)) != std::string::npos) - return ARGS_LABEL; - - if (Sym.find(getTagName(AUTOS_LABEL)) != std::string::npos) - return AUTOS_LABEL; - - if (Sym.find(getTagName(LIBCALL)) != std::string::npos) - return LIBCALL; - - // It does not have any Tag. So its a true global or static local. - if (Sym.find(".") == std::string::npos) - return GLOBAL; - - // If a . is there, then it may be static local. - // We should mangle these as well in clang. - if (Sym.find(".") != std::string::npos) - return STATIC_LOCAL; - - assert (0 && "Could not determine Symbol's tag"); - return PREFIX_SYMBOL; // Silence warning when assertions are turned off. - } - - // addPrefix - add prefix symbol to a name if there isn't one already. - inline static std::string addPrefix (const std::string &Name) { - std::string prefix = getTagName (PREFIX_SYMBOL); - - // If this name already has a prefix, nothing to do. - if (Name.compare(0, prefix.size(), prefix) == 0) - return Name; - - return prefix + Name; - } - - // Get mangled func name from a mangled sym name. - // In all cases func name is the first component before a '.'. - static inline std::string getFuncNameForSym(const std::string &Sym1) { - assert (getSymbolTag(Sym1) != GLOBAL && "not belongs to a function"); - - std::string Sym = addPrefix(Sym1); - - // Position of the . after func name. That's where func name ends. - size_t func_name_end = Sym.find ('.'); - - return Sym.substr (0, func_name_end); - } - - // Get Frame start label for a func. - static std::string getFrameLabel(const std::string &Func) { - std::string Func1 = addPrefix(Func); - std::string tag = getTagName(FRAME_LABEL); - return Func1 + tag; - } - - static std::string getRetvalLabel(const std::string &Func) { - std::string Func1 = addPrefix(Func); - std::string tag = getTagName(RET_LABEL); - return Func1 + tag; - } - - static std::string getArgsLabel(const std::string &Func) { - std::string Func1 = addPrefix(Func); - std::string tag = getTagName(ARGS_LABEL); - return Func1 + tag; - } - - static std::string getTempdataLabel(const std::string &Func) { - std::string Func1 = addPrefix(Func); - std::string tag = getTagName(TEMPS_LABEL); - return Func1 + tag; - } - static std::string getFrameSectionName(const std::string &Func) { - std::string Func1 = addPrefix(Func); - std::string tag = getTagName(FRAME_SECTION); - return Func1 + tag + "# UDATA_OVR"; - } - - static std::string getAutosSectionName(const std::string &Func) { - std::string Func1 = addPrefix(Func); - std::string tag = getTagName(AUTOS_SECTION); - return Func1 + tag + "# UDATA_OVR"; - } - - static std::string getCodeSectionName(const std::string &Func) { - std::string Func1 = addPrefix(Func); - std::string tag = getTagName(CODE_SECTION); - return Func1 + tag + "# CODE"; - } - - // udata, romdata and idata section names are generated by a given number. - // @udata.<num>.# - static std::string getUdataSectionName(unsigned num, - std::string prefix = "") { - std::ostringstream o; - o << getTagName(PREFIX_SYMBOL) << prefix << "udata." << num - << ".# UDATA"; - return o.str(); - } - - static std::string getRomdataSectionName(unsigned num, - std::string prefix = "") { - std::ostringstream o; - o << getTagName(PREFIX_SYMBOL) << prefix << "romdata." << num - << ".# ROMDATA"; - return o.str(); - } - - static std::string getIdataSectionName(unsigned num, - std::string prefix = "") { - std::ostringstream o; - o << getTagName(PREFIX_SYMBOL) << prefix << "idata." << num - << ".# IDATA"; - return o.str(); - } - - inline static bool isLocalName (const std::string &Name) { - if (getSymbolTag(Name) == AUTOS_LABEL) - return true; - - return false; - } - - inline static bool isMemIntrinsic (const std::string &Name) { - if (Name.compare("@memcpy") == 0 || Name.compare("@memset") == 0 || - Name.compare("@memmove") == 0) { - return true; - } - - return false; - } - - inline static bool isLocalToFunc (std::string &Func, std::string &Var) { - if (! isLocalName(Var)) return false; - - std::string Func1 = addPrefix(Func); - // Extract func name of the varilable. - const std::string &fname = getFuncNameForSym(Var); - - if (fname.compare(Func1) == 0) - return true; - - return false; - } - - - // Get the section for the given external symbol names. - // This tries to find the type (Tag) of the symbol from its mangled name - // and return appropriate section name for it. - static inline std::string getSectionNameForSym(const std::string &Sym1) { - std::string Sym = addPrefix(Sym1); - - std::string SectionName; - - std::string Fname = getFuncNameForSym (Sym); - TAGS id = getSymbolTag (Sym); - - switch (id) { - default : assert (0 && "Could not determine external symbol type"); - case FRAME_LABEL: - case RET_LABEL: - case TEMPS_LABEL: - case ARGS_LABEL: { - return getFrameSectionName(Fname); - } - case AUTOS_LABEL: { - return getAutosSectionName(Fname); - } - } - } - }; // class PAN. + enum PIC16SectionType { + CODE, + UDATA, + IDATA, + ROMDATA, + UDATA_OVR, + UDATA_SHR + }; // External symbol names require memory to live till the program end. diff --git a/lib/Target/PIC16/PIC16ABINames.h b/lib/Target/PIC16/PIC16ABINames.h new file mode 100644 index 0000000..7f4c2f1 --- /dev/null +++ b/lib/Target/PIC16/PIC16ABINames.h @@ -0,0 +1,325 @@ +//===-- PIC16ABINames.h - PIC16 Naming conventios for ABI----- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the functions to manage ABI Naming conventions for PIC16. +// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_PIC16ABINAMES_H +#define LLVM_TARGET_PIC16ABINAMES_H + +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetMachine.h" +#include <cassert> +#include <sstream> +#include <cstring> +#include <string> + +namespace llvm { + class PIC16TargetMachine; + class FunctionPass; + class MachineCodeEmitter; + class formatted_raw_ostream; + + // A Central class to manage all ABI naming conventions. + // PAN - [P]ic16 [A]BI [N]ames + class PAN { + public: + // Map the name of the symbol to its section name. + // Current ABI: + // ----------------------------------------------------- + // ALL Names are prefixed with the symobl '@'. + // ------------------------------------------------------ + // Global variables do not have any '.' in their names. + // These are maily function names and global variable names. + // Example - @foo, @i + // Static local variables - @<func>.<var> + // ------------------------------------------------------- + // Functions and auto variables. + // Names are mangled as <prefix><funcname>.<tag>.<varname> + // Where <prefix> is '@' and <tag> is any one of + // the following + // .auto. - an automatic var of a function. + // .temp. - temproray data of a function. + // .ret. - return value label for a function. + // .frame. - Frame label for a function where retval, args + // and temps are stored. + // .args. - Label used to pass arguments to a direct call. + // Example - Function name: @foo + // Its frame: @foo.frame. + // Its retval: @foo.ret. + // Its local vars: @foo.auto.a + // Its temp data: @foo.temp. + // Its arg passing: @foo.args. + //---------------------------------------------- + // Libcall - compiler generated libcall names must start with .lib. + // This id will be used to emit extern decls for libcalls. + // Example - libcall name: @.lib.sra.i8 + // To pass args: @.lib.sra.i8.args. + // To return val: @.lib.sra.i8.ret. + //---------------------------------------------- + // SECTION Names + // uninitialized globals - @udata.<num>.# + // initialized globals - @idata.<num>.# + // Program memory data - @romdata.# + // Variables with user defined section name - <user_defined_section> + // Variables with user defined address - @<var>.user_section.<address>.# + // Function frame - @<func>.frame_section. + // Function autos - @<func>.autos_section. + // Overlay sections - @<color>.## + // Declarations - Enclosed in comments. No section for them. + //---------------------------------------------------------- + + // Tags used to mangle different names. + enum TAGS { + PREFIX_SYMBOL, + GLOBAL, + STATIC_LOCAL, + AUTOS_LABEL, + FRAME_LABEL, + RET_LABEL, + ARGS_LABEL, + TEMPS_LABEL, + + LIBCALL, + + FRAME_SECTION, + AUTOS_SECTION, + CODE_SECTION, + USER_SECTION + }; + + // Textual names of the tags. + inline static const char *getTagName(TAGS tag) { + switch (tag) { + default: return ""; + case PREFIX_SYMBOL: return "@"; + case AUTOS_LABEL: return ".auto."; + case FRAME_LABEL: return ".frame."; + case TEMPS_LABEL: return ".temp."; + case ARGS_LABEL: return ".args."; + case RET_LABEL: return ".ret."; + case LIBCALL: return ".lib."; + case FRAME_SECTION: return ".frame_section."; + case AUTOS_SECTION: return ".autos_section."; + case CODE_SECTION: return ".code_section."; + case USER_SECTION: return ".user_section."; + } + } + + // Get tag type for the Symbol. + inline static TAGS getSymbolTag(const std::string &Sym) { + if (Sym.find(getTagName(TEMPS_LABEL)) != std::string::npos) + return TEMPS_LABEL; + + if (Sym.find(getTagName(FRAME_LABEL)) != std::string::npos) + return FRAME_LABEL; + + if (Sym.find(getTagName(RET_LABEL)) != std::string::npos) + return RET_LABEL; + + if (Sym.find(getTagName(ARGS_LABEL)) != std::string::npos) + return ARGS_LABEL; + + if (Sym.find(getTagName(AUTOS_LABEL)) != std::string::npos) + return AUTOS_LABEL; + + if (Sym.find(getTagName(LIBCALL)) != std::string::npos) + return LIBCALL; + + // It does not have any Tag. So its a true global or static local. + if (Sym.find(".") == std::string::npos) + return GLOBAL; + + // If a . is there, then it may be static local. + // We should mangle these as well in clang. + if (Sym.find(".") != std::string::npos) + return STATIC_LOCAL; + + assert (0 && "Could not determine Symbol's tag"); + return PREFIX_SYMBOL; // Silence warning when assertions are turned off. + } + + // addPrefix - add prefix symbol to a name if there isn't one already. + inline static std::string addPrefix (const std::string &Name) { + std::string prefix = getTagName (PREFIX_SYMBOL); + + // If this name already has a prefix, nothing to do. + if (Name.compare(0, prefix.size(), prefix) == 0) + return Name; + + return prefix + Name; + } + + // Get mangled func name from a mangled sym name. + // In all cases func name is the first component before a '.'. + static inline std::string getFuncNameForSym(const std::string &Sym1) { + assert (getSymbolTag(Sym1) != GLOBAL && "not belongs to a function"); + + std::string Sym = addPrefix(Sym1); + + // Position of the . after func name. That's where func name ends. + size_t func_name_end = Sym.find ('.'); + + return Sym.substr (0, func_name_end); + } + + // Get Frame start label for a func. + static std::string getFrameLabel(const std::string &Func) { + std::string Func1 = addPrefix(Func); + std::string tag = getTagName(FRAME_LABEL); + return Func1 + tag; + } + + static std::string getRetvalLabel(const std::string &Func) { + std::string Func1 = addPrefix(Func); + std::string tag = getTagName(RET_LABEL); + return Func1 + tag; + } + + static std::string getArgsLabel(const std::string &Func) { + std::string Func1 = addPrefix(Func); + std::string tag = getTagName(ARGS_LABEL); + return Func1 + tag; + } + + static std::string getTempdataLabel(const std::string &Func) { + std::string Func1 = addPrefix(Func); + std::string tag = getTagName(TEMPS_LABEL); + return Func1 + tag; + } + + static std::string getFrameSectionName(const std::string &Func) { + std::string Func1 = addPrefix(Func); + std::string tag = getTagName(FRAME_SECTION); + return Func1 + tag + "#"; + } + + static std::string getAutosSectionName(const std::string &Func) { + std::string Func1 = addPrefix(Func); + std::string tag = getTagName(AUTOS_SECTION); + return Func1 + tag + "#"; + } + + static std::string getCodeSectionName(const std::string &Func) { + std::string Func1 = addPrefix(Func); + std::string tag = getTagName(CODE_SECTION); + return Func1 + tag + "#"; + } + + static std::string getUserSectionName(const std::string &Name) { + std::string sname = addPrefix(Name);; + std::string tag = getTagName(USER_SECTION); + return sname + tag + "#"; + } + + // udata, romdata and idata section names are generated by a given number. + // @udata.<num>.# + static std::string getUdataSectionName(unsigned num, + std::string prefix = "") { + std::ostringstream o; + o << getTagName(PREFIX_SYMBOL) << prefix << "udata." << num + << ".#"; + return o.str(); + } + + static std::string getRomdataSectionName() { + return "romdata.#"; + } + + static std::string getRomdataSectionName(unsigned num, + std::string prefix = "") { + std::ostringstream o; + o << getTagName(PREFIX_SYMBOL) << prefix << "romdata." << num + << ".#"; + return o.str(); + } + + static std::string getIdataSectionName(unsigned num, + std::string prefix = "") { + std::ostringstream o; + o << getTagName(PREFIX_SYMBOL) << prefix << "idata." << num + << ".#"; + return o.str(); + } + + inline static bool isLocalName (const std::string &Name) { + if (getSymbolTag(Name) == AUTOS_LABEL) + return true; + + return false; + } + + inline static bool isMemIntrinsic (const std::string &Name) { + if (Name.compare("@memcpy") == 0 || Name.compare("@memset") == 0 || + Name.compare("@memmove") == 0) { + return true; + } + + return false; + } + + inline static bool isLocalToFunc (std::string &Func, std::string &Var) { + if (! isLocalName(Var)) return false; + + std::string Func1 = addPrefix(Func); + // Extract func name of the varilable. + const std::string &fname = getFuncNameForSym(Var); + + if (fname.compare(Func1) == 0) + return true; + + return false; + } + + + // Get the section for the given external symbol names. + // This tries to find the type (Tag) of the symbol from its mangled name + // and return appropriate section name for it. + static inline std::string getSectionNameForSym(const std::string &Sym1) { + std::string Sym = addPrefix(Sym1); + + std::string SectionName; + + std::string Fname = getFuncNameForSym (Sym); + TAGS id = getSymbolTag (Sym); + + switch (id) { + default : assert (0 && "Could not determine external symbol type"); + case FRAME_LABEL: + case RET_LABEL: + case TEMPS_LABEL: + case ARGS_LABEL: { + return getFrameSectionName(Fname); + } + case AUTOS_LABEL: { + return getAutosSectionName(Fname); + } + } + } + + /// Return Overlay Name for the section. + /// The ABI Convention is: @<Color>.##.<section_tag> + /// The section_tag is retrieved from the SectName parameter and + /// and Color is passed in parameter. + static inline std::string getOverlayName(std::string SectName, int Color) { + // FIXME: Only autos_section and frame_section are colored. + // So check and assert if the passed SectName does not have AUTOS_SECTION + // or FRAME_SECTION tag in it. + std::ostringstream o; + o << getTagName(PREFIX_SYMBOL) << Color << ".##" + << SectName.substr(SectName.find(".")); + + return o.str(); + } + }; // class PAN. +} // end namespace llvm; + +#endif diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp index 961caed..0ed44d2 100644 --- a/lib/Target/PIC16/PIC16DebugInfo.cpp +++ b/lib/Target/PIC16/PIC16DebugInfo.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "PIC16.h" +#include "PIC16ABINames.h" #include "PIC16DebugInfo.h" #include "llvm/GlobalVariable.h" #include "llvm/CodeGen/MachineFunction.h" @@ -30,10 +31,10 @@ void PIC16DbgInfo::PopulateDebugInfo (DIType Ty, unsigned short &TypeNo, std::string &TagName) { if (Ty.isBasicType()) PopulateBasicTypeInfo (Ty, TypeNo); - else if (Ty.isDerivedType()) - PopulateDerivedTypeInfo (Ty, TypeNo, HasAux, Aux, TagName); else if (Ty.isCompositeType()) PopulateCompositeTypeInfo (Ty, TypeNo, HasAux, Aux, TagName); + else if (Ty.isDerivedType()) + PopulateDerivedTypeInfo (Ty, TypeNo, HasAux, Aux, TagName); else { TypeNo = PIC16Dbg::T_NULL; HasAux = false; @@ -190,7 +191,7 @@ unsigned PIC16DbgInfo::GetTypeDebugNumber(std::string &type) { /// short PIC16DbgInfo::getStorageClass(DIGlobalVariable DIGV) { short ClassNo; - if (PAN::isLocalName(DIGV.getGlobal()->getName())) { + if (PAN::isLocalName(DIGV.getName())) { // Generating C_AUTO here fails due to error in linker. Change it once // linker is fixed. ClassNo = PIC16Dbg::C_STAT; @@ -446,7 +447,8 @@ void PIC16DbgInfo::EmitVarDebugInfo(Module &M) { bool HasAux = false; int Aux[PIC16Dbg::AuxSize] = { 0 }; std::string TagName = ""; - std::string VarName = MAI->getGlobalPrefix()+DIGV.getGlobal()->getNameStr(); + std::string VarName = DIGV.getName(); + VarName = MAI->getGlobalPrefix() + VarName; PopulateDebugInfo(Ty, TypeNo, HasAux, Aux, TagName); // Emit debug info only if type information is availaible. if (TypeNo != PIC16Dbg::T_NULL) { diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp index bf986b1..635befe 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.cpp +++ b/lib/Target/PIC16/PIC16ISelLowering.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "pic16-lower" +#include "PIC16ABINames.h" #include "PIC16ISelLowering.h" #include "PIC16TargetObjectFile.h" #include "PIC16TargetMachine.h" diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp index cb0c41b..87bd3d9 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.cpp +++ b/lib/Target/PIC16/PIC16InstrInfo.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "PIC16.h" +#include "PIC16ABINames.h" #include "PIC16InstrInfo.h" #include "PIC16TargetMachine.h" #include "PIC16GenInstrInfo.inc" diff --git a/lib/Target/PIC16/PIC16MCAsmInfo.cpp b/lib/Target/PIC16/PIC16MCAsmInfo.cpp index a17d1a8..827315e 100644 --- a/lib/Target/PIC16/PIC16MCAsmInfo.cpp +++ b/lib/Target/PIC16/PIC16MCAsmInfo.cpp @@ -16,6 +16,7 @@ // FIXME: Layering violation to get enums and static function, should be moved // to separate headers. #include "PIC16.h" +#include "PIC16ABINames.h" #include "PIC16ISelLowering.h" using namespace llvm; diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp index c9ebb57..a97dc35 100644 --- a/lib/Target/PIC16/PIC16MemSelOpt.cpp +++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp @@ -21,6 +21,7 @@ #define DEBUG_TYPE "pic16-codegen" #include "PIC16.h" +#include "PIC16ABINames.h" #include "PIC16InstrInfo.h" #include "PIC16MCAsmInfo.h" #include "PIC16TargetMachine.h" diff --git a/lib/Target/PIC16/PIC16Passes/Makefile b/lib/Target/PIC16/PIC16Passes/Makefile new file mode 100644 index 0000000..cbb34b3 --- /dev/null +++ b/lib/Target/PIC16/PIC16Passes/Makefile @@ -0,0 +1,17 @@ +##===- lib/Target/PIC16/PIC16Passes/Makefile -----------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +TARGET = PIC16 +LIBRARYNAME = LLVMpic16passes +BUILD_ARCHIVE = 1 + + + +include $(LEVEL)/Makefile.common + diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp new file mode 100644 index 0000000..197c398 --- /dev/null +++ b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp @@ -0,0 +1,181 @@ +//===-- PIC16Overlay.cpp - Implementation for PIC16 Frame Overlay===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PIC16 Frame Overlay implementation. +// +//===----------------------------------------------------------------------===// + + +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Pass.h" +#include "llvm/Module.h" +#include "llvm/Instructions.h" +#include "llvm/Value.h" +#include "PIC16Overlay.h" +#include "llvm/Function.h" +#include <cstdlib> +#include <sstream> +using namespace llvm; + +namespace llvm { + char PIC16FrameOverlay::ID = 0; + ModulePass *createPIC16OverlayPass() { return new PIC16FrameOverlay(); } +} + +void PIC16FrameOverlay::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<CallGraph>(); +} + +void PIC16FrameOverlay::DFSTraverse(CallGraphNode *CGN, unsigned Depth) { + // Do not set any color for external calling node. + if (Depth != 0 && CGN->getFunction()) { + unsigned Color = getColor(CGN->getFunction()); + + // Handle indirectly called functions + if (Color >= PIC16Overlay::StartIndirectCallColor || + Depth >= PIC16Overlay::StartIndirectCallColor) { + // All functions called from an indirectly called function are given + // an unique color. + if (Color < PIC16Overlay::StartIndirectCallColor && + Depth >= PIC16Overlay::StartIndirectCallColor) + setColor(CGN->getFunction(), Depth); + + for (unsigned int i = 0; i < CGN->size(); i++) + DFSTraverse((*CGN)[i], ++IndirectCallColor); + return; + } + // Just return if the node already has a color greater than the current + // depth. A node must be colored with the maximum depth that it has. + if (Color >= Depth) + return; + + Depth = ModifyDepthForInterrupt(CGN, Depth); + setColor(CGN->getFunction(), Depth); + } + + // Color all children of this node with color depth+1. + for (unsigned int i = 0; i < CGN->size(); i++) + DFSTraverse((*CGN)[i], Depth+1); +} + +unsigned PIC16FrameOverlay::ModifyDepthForInterrupt(CallGraphNode *CGN, + unsigned Depth) { + Function *Fn = CGN->getFunction(); + + // Return original Depth if function or section for function do not exist. + if (!Fn || !Fn->hasSection()) + return Depth; + + // Return original Depth if this function is not marked as interrupt. + if (Fn->getSection().find("interrupt") == string::npos) + return Depth; + + Depth = Depth + InterruptDepth; + return Depth; +} + +void PIC16FrameOverlay::setColor(Function *Fn, unsigned Color) { + std::string Section = ""; + if (Fn->hasSection()) + Section = Fn->getSection(); + + size_t Pos = Section.find(OverlayStr); + + // Convert Color to string. + std::stringstream ss; + ss << Color; + std::string ColorString = ss.str(); + + // If color is already set then reset it with the new value. Else append + // the Color string to section. + if (Pos != std::string::npos) { + Pos += OverlayStr.length(); + char c = Section.at(Pos); + unsigned OldColorLength = 0; + while (c >= '0' && c<= '9') { + OldColorLength++; + if (Pos < Section.length() - 1) + Pos++; + else + break; + c = Section.at(Pos); + } + // Replace old color with new one. + Section.replace(Pos-OldColorLength +1, OldColorLength, ColorString); + } + else { + // Append Color information to section string. + if (Fn->hasSection()) + Section.append(" "); + Section.append(OverlayStr + ColorString); + } + Fn->setSection(Section); +} + +unsigned PIC16FrameOverlay::getColor(Function *Fn) { + int Color = 0; + if (!Fn->hasSection()) + return 0; + + std::string Section = Fn->getSection(); + size_t Pos = Section.find(OverlayStr); + + // Return 0 if Color is not set. + if (Pos == std::string::npos) + return 0; + + // Set Pos to after "Overlay=". + Pos += OverlayStr.length(); + char c = Section.at(Pos); + std::string ColorString = ""; + + // Find the string representing Color. A Color can only consist of digits. + while (c >= '0' && c<= '9') { + ColorString.append(1,c); + if (Pos < Section.length() - 1) + Pos++; + else + break; + c = Section.at(Pos); + } + Color = atoi(ColorString.c_str()); + + return Color; +} + +bool PIC16FrameOverlay::runOnModule(Module &M) { + CallGraph &CG = getAnalysis<CallGraph>(); + CallGraphNode *ECN = CG.getExternalCallingNode(); + + MarkIndirectlyCalledFunctions(M); + // Since External Calling Node is the base function, do a depth first + // traversal of CallGraph with ECN as root. Each node with be marked with + // a color that is max(color(callers)) + 1. + if(ECN) { + DFSTraverse(ECN, 0); + } + return false; +} + +void PIC16FrameOverlay::MarkIndirectlyCalledFunctions(Module &M) { + // If the use of a function is not a call instruction then this + // function might be called indirectly. In that case give it + // an unique color. + for (Module::iterator MI = M.begin(), E = M.end(); MI != E; ++MI) { + for (Value::use_iterator I = MI->use_begin(), E = MI->use_end(); I != E; + ++I) { + if ((!isa<CallInst>(I) && !isa<InvokeInst>(I)) + || !CallSite(cast<Instruction>(I)).isCallee(I)) { + setColor(MI, ++IndirectCallColor); + break; + } + } + } +} diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h new file mode 100644 index 0000000..d70c4e7 --- /dev/null +++ b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h @@ -0,0 +1,55 @@ +//===-- PIC16FrameOverlay.h - Interface for PIC16 Frame Overlay -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PIC16 Overlay infrastructure. +// +//===----------------------------------------------------------------------===// + +#ifndef PIC16FRAMEOVERLAY_H +#define PIC16FRAMEOVERLAY_H + +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Pass.h" +#include "llvm/CallGraphSCCPass.h" + +using std::string; +using namespace llvm; + +namespace llvm { + namespace PIC16Overlay { + enum OverlayConsts { + StartInterruptColor = 200, + StartIndirectCallColor = 300 + }; + } + class PIC16FrameOverlay : public ModulePass { + std::string OverlayStr; + unsigned InterruptDepth; + unsigned IndirectCallColor; + public: + static char ID; // Class identification + PIC16FrameOverlay() : ModulePass(&ID) { + OverlayStr = "Overlay="; + InterruptDepth = PIC16Overlay::StartInterruptColor; + IndirectCallColor = PIC16Overlay::StartIndirectCallColor; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool runOnModule(Module &M); + + private: + unsigned getColor(Function *Fn); + void setColor(Function *Fn, unsigned Color); + unsigned ModifyDepthForInterrupt(CallGraphNode *CGN, unsigned Depth); + void MarkIndirectlyCalledFunctions(Module &M); + void DFSTraverse(CallGraphNode *CGN, unsigned Depth); + }; +} // End of namespace + +#endif diff --git a/lib/Target/PIC16/PIC16Section.cpp b/lib/Target/PIC16/PIC16Section.cpp new file mode 100644 index 0000000..a96ebb8 --- /dev/null +++ b/lib/Target/PIC16/PIC16Section.cpp @@ -0,0 +1,96 @@ +//===-- PIC16Section.cpp - PIC16 Section ----------- --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PIC16.h" +#include "PIC16ABINames.h" +#include "PIC16Section.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + + +// This is the only way to create a PIC16Section. Sections created here +// do not need to be explicitly deleted as they are managed by auto_ptrs. +PIC16Section *PIC16Section::Create(const StringRef &Name, + PIC16SectionType Ty, + const std::string &Address, + int Color, MCContext &Ctx) { + + /// Determine the internal SectionKind info. + /// Users of PIC16Section class should not need to know the internal + /// SectionKind. They should work only with PIC16SectionType. + /// + /// PIC16 Terminology for section kinds is as below. + /// UDATA - BSS + /// IDATA - initialized data (equiv to Metadata) + /// ROMDATA - ReadOnly. + /// UDATA_OVR - Sections that can be overlaid. Section of such type is + /// used to contain function autos an frame. We can think of + /// it as equiv to llvm ThreadBSS) + /// UDATA_SHR - Shared RAM. Memory area that is mapped to all banks. + + SectionKind K; + switch (Ty) { + default: llvm_unreachable ("can not create unknown section type"); + case UDATA_OVR: { + K = SectionKind::getThreadBSS(); + break; + } + case UDATA_SHR: + case UDATA: { + K = SectionKind::getBSS(); + break; + } + case ROMDATA: + case IDATA: { + K = SectionKind::getMetadata(); + break; + } + case CODE: { + K = SectionKind::getText(); + break; + } + + } + + // Create the Section. + PIC16Section *S = new (Ctx) PIC16Section(Name, K, Address, Color); + S->T = Ty; + return S; +} + +// A generic way to print all types of sections. +void PIC16Section::PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS) const { + + // If the section is overlaid(i.e. it has a color), print overlay name for + // it. Otherwise print its normal name. + if (Color != -1) + OS << PAN::getOverlayName(getName(), Color) << '\t'; + else + OS << getName() << '\t'; + + // Print type. + switch (getType()) { + default : llvm_unreachable ("unknown section type"); + case UDATA: OS << "UDATA"; break; + case IDATA: OS << "IDATA"; break; + case ROMDATA: OS << "ROMDATA"; break; + case UDATA_SHR: OS << "UDATA_SHR"; break; + case UDATA_OVR: OS << "UDATA_OVR"; break; + case CODE: OS << "CODE"; break; + } + + OS << '\t'; + + // Print Address. + OS << Address; + + OS << '\n'; +} diff --git a/lib/Target/PIC16/PIC16Section.h b/lib/Target/PIC16/PIC16Section.h new file mode 100644 index 0000000..3a8bbfb --- /dev/null +++ b/lib/Target/PIC16/PIC16Section.h @@ -0,0 +1,92 @@ +//===- PIC16Section.h - PIC16-specific section representation -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the PIC16Section class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PIC16SECTION_H +#define LLVM_PIC16SECTION_H + +#include "llvm/MC/MCSection.h" +#include "llvm/GlobalVariable.h" +#include <vector> + +namespace llvm { + /// PIC16Section - Represents a physical section in PIC16 COFF. + /// Contains data objects. + /// + class PIC16Section : public MCSection { + /// PIC16 Sections does not really use the SectionKind class to + /// to distinguish between various types of sections. PIC16 maintain + /// its own Section Type info. See the PIC16SectionType enum in PIC16.h + /// for various section types. + PIC16SectionType T; + + /// Name of the section to uniquely identify it. + std::string Name; + + /// User can specify an address at which a section should be placed. + /// Negative value here means user hasn't specified any. + std::string Address; + + /// Overlay information - Sections with same color can be overlaid on + /// one another. + int Color; + + /// Total size of all data objects contained here. + unsigned Size; + + PIC16Section(const StringRef &name, SectionKind K, const std::string &addr, + int color) + : MCSection(K), Name(name), Address(addr), Color(color) { + } + + public: + /// Return the name of the section. + const std::string &getName() const { return Name; } + + /// Return the Address of the section. + const std::string &getAddress() const { return Address; } + + /// Return the Color of the section. + int getColor() const { return Color; } + void setColor(int color) { Color = color; } + + /// Return the size of the section. + unsigned getSize() const { return Size; } + void setSize(unsigned size) { Size = size; } + + /// Conatined data objects. + std::vector<const GlobalVariable *>Items; + + /// Check section type. + bool isUDATA_Type() const { return T == UDATA; } + bool isIDATA_Type() const { return T == IDATA; } + bool isROMDATA_Type() const { return T == ROMDATA; } + bool isUDATA_OVR_Type() const { return T == UDATA_OVR; } + bool isUDATA_SHR_Type() const { return T == UDATA_SHR; } + bool isCODE_Type() const { return T == CODE; } + + PIC16SectionType getType() const { return T; } + + /// This would be the only way to create a section. + static PIC16Section *Create(const StringRef &Name, PIC16SectionType Ty, + const std::string &Address, int Color, + MCContext &Ctx); + + /// Override this as PIC16 has its own way of printing switching + /// to a section. + virtual void PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS) const; + }; + +} // end namespace llvm + +#endif diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.cpp b/lib/Target/PIC16/PIC16TargetObjectFile.cpp index a2a4c09..7eedf7f 100644 --- a/lib/Target/PIC16/PIC16TargetObjectFile.cpp +++ b/lib/Target/PIC16/PIC16TargetObjectFile.cpp @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// #include "PIC16TargetObjectFile.h" -#include "MCSectionPIC16.h" #include "PIC16ISelLowering.h" #include "PIC16TargetMachine.h" +#include "PIC16Section.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/MC/MCSection.h" @@ -19,75 +19,118 @@ using namespace llvm; -MCSectionPIC16 *MCSectionPIC16::Create(const StringRef &Name, SectionKind K, - int Address, int Color, MCContext &Ctx) { - return new (Ctx) MCSectionPIC16(Name, K, Address, Color); +PIC16TargetObjectFile::PIC16TargetObjectFile() { } +PIC16TargetObjectFile::~PIC16TargetObjectFile() { +} -void MCSectionPIC16::PrintSwitchToSection(const MCAsmInfo &MAI, - raw_ostream &OS) const { - OS << getName() << '\n'; +/// Find a pic16 section. Return null if not found. Do not create one. +PIC16Section *PIC16TargetObjectFile:: +findPIC16Section(const std::string &Name) { + /// Return if we have an already existing one. + PIC16Section *Entry = SectionsByName[Name]; + if (Entry) + return Entry; + + return NULL; } +/// Find a pic16 section. If not found, create one. +PIC16Section *PIC16TargetObjectFile:: +getPIC16Section(const std::string &Name, PIC16SectionType Ty, + const std::string &Address, int Color) const { + /// Return if we have an already existing one. + PIC16Section *&Entry = SectionsByName[Name]; + if (Entry) + return Entry; -PIC16TargetObjectFile::PIC16TargetObjectFile() - : ExternalVarDecls(0), ExternalVarDefs(0) { + + Entry = PIC16Section::Create(Name, Ty, Address, Color, getContext()); + return Entry; } -const MCSectionPIC16 *PIC16TargetObjectFile:: -getPIC16Section(const char *Name, SectionKind Kind, - int Address, int Color) const { - MCSectionPIC16 *&Entry = SectionsByName[Name]; +/// Find a standard pic16 data section. If not found, create one and keep +/// track of it by adding it to appropriate std section list. +PIC16Section *PIC16TargetObjectFile:: +getPIC16DataSection(const std::string &Name, PIC16SectionType Ty, + const std::string &Address, int Color) const { + + /// Return if we have an already existing one. + PIC16Section *&Entry = SectionsByName[Name]; if (Entry) return Entry; - return Entry = MCSectionPIC16::Create(Name, Kind, Address, Color, - getContext()); -} + /// Else create a new one and add it to appropriate section list. + Entry = PIC16Section::Create(Name, Ty, Address, Color, getContext()); -void PIC16TargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &tm){ - TargetLoweringObjectFile::Initialize(Ctx, tm); - TM = &tm; - - BSSSection = getPIC16Section("udata.# UDATA", MCSectionPIC16::UDATA_Kind()); - ReadOnlySection = getPIC16Section("romdata.# ROMDATA", - MCSectionPIC16::ROMDATA_Kind()); - DataSection = getPIC16Section("idata.# IDATA", MCSectionPIC16::IDATA_Kind()); - - // Need because otherwise a .text symbol is emitted by DwarfWriter - // in BeginModule, and gpasm cribbs for that .text symbol. - TextSection = getPIC16Section("", SectionKind::getText()); + switch (Ty) { + default: llvm_unreachable ("unknow standard section type."); + case UDATA: UDATASections_.push_back(Entry); break; + case IDATA: IDATASections_.push_back(Entry); break; + case ROMDATA: ROMDATASection_ = Entry; break; + } - ROSections.push_back(new PIC16Section((MCSectionPIC16*)ReadOnlySection)); - - // FIXME: I don't know what the classification of these sections really is. - // These aren't really objects belonging to any section. Just emit them - // in AsmPrinter and remove this code from here. - ExternalVarDecls = new PIC16Section(getPIC16Section("ExternalVarDecls", - SectionKind::getMetadata())); - ExternalVarDefs = new PIC16Section(getPIC16Section("ExternalVarDefs", - SectionKind::getMetadata())); + return Entry; } + -const MCSection *PIC16TargetObjectFile:: -getSectionForFunction(const std::string &FnName) const { - std::string T = PAN::getCodeSectionName(FnName); - return getPIC16Section(T.c_str(), SectionKind::getText()); +/// Find a standard pic16 autos section. If not found, create one and keep +/// track of it by adding it to appropriate std section list. +PIC16Section *PIC16TargetObjectFile:: +getPIC16AutoSection(const std::string &Name, PIC16SectionType Ty, + const std::string &Address, int Color) const { + + /// Return if we have an already existing one. + PIC16Section *&Entry = SectionsByName[Name]; + if (Entry) + return Entry; + + + /// Else create a new one and add it to appropriate section list. + Entry = PIC16Section::Create(Name, Ty, Address, Color, getContext()); + + assert (Ty == UDATA_OVR && "incorrect section type for autos"); + AUTOSections_.push_back(Entry); + + return Entry; } + +/// Find a pic16 user section. If not found, create one and keep +/// track of it by adding it to appropriate std section list. +PIC16Section *PIC16TargetObjectFile:: +getPIC16UserSection(const std::string &Name, PIC16SectionType Ty, + const std::string &Address, int Color) const { + + /// Return if we have an already existing one. + PIC16Section *&Entry = SectionsByName[Name]; + if (Entry) + return Entry; -const MCSection *PIC16TargetObjectFile:: -getSectionForFunctionFrame(const std::string &FnName) const { - std::string T = PAN::getFrameSectionName(FnName); - return getPIC16Section(T.c_str(), SectionKind::getDataRel()); + /// Else create a new one and add it to appropriate section list. + Entry = PIC16Section::Create(Name, Ty, Address, Color, getContext()); + + USERSections_.push_back(Entry); + + return Entry; } +/// Do some standard initialization. +void PIC16TargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &tm){ + TargetLoweringObjectFile::Initialize(Ctx, tm); + TM = &tm; + + ROMDATASection_ = NULL; +} + +/// allocateUDATA - Allocate a un-initialized global to an existing or new UDATA +/// section and return that section. const MCSection * -PIC16TargetObjectFile::getBSSSectionForGlobal(const GlobalVariable *GV) const { +PIC16TargetObjectFile::allocateUDATA(const GlobalVariable *GV) const { assert(GV->hasInitializer() && "This global doesn't need space"); Constant *C = GV->getInitializer(); assert(C->isNullValue() && "Unitialized globals has non-zero initializer"); @@ -97,41 +140,37 @@ PIC16TargetObjectFile::getBSSSectionForGlobal(const GlobalVariable *GV) const { const Type *Ty = C->getType(); unsigned ValSize = TD->getTypeAllocSize(Ty); - // Go through all BSS Sections and assign this variable + // Go through all UDATA Sections and assign this variable // to the first available section having enough space. - PIC16Section *FoundBSS = NULL; - for (unsigned i = 0; i < BSSSections.size(); i++) { - if (DataBankSize - BSSSections[i]->Size >= ValSize) { - FoundBSS = BSSSections[i]; + PIC16Section *Found = NULL; + for (unsigned i = 0; i < UDATASections_.size(); i++) { + if (DataBankSize - UDATASections_[i]->getSize() >= ValSize) { + Found = UDATASections_[i]; break; } } - // No BSS section spacious enough was found. Crate a new one. - if (!FoundBSS) { - std::string name = PAN::getUdataSectionName(BSSSections.size()); - const MCSectionPIC16 *NewSection - = getPIC16Section(name.c_str(), MCSectionPIC16::UDATA_Kind()); - - FoundBSS = new PIC16Section(NewSection); - - // Add this newly created BSS section to the list of BSSSections. - BSSSections.push_back(FoundBSS); + // No UDATA section spacious enough was found. Crate a new one. + if (!Found) { + std::string name = PAN::getUdataSectionName(UDATASections_.size()); + Found = getPIC16DataSection(name.c_str(), UDATA); } - // Insert the GV into this BSS. - FoundBSS->Items.push_back(GV); - FoundBSS->Size += ValSize; - return FoundBSS->S_; + // Insert the GV into this UDATA section. + Found->Items.push_back(GV); + Found->setSize(Found->getSize() + ValSize); + return Found; } +/// allocateIDATA - allocate an initialized global into an existing +/// or new section and return that section. const MCSection * -PIC16TargetObjectFile::getIDATASectionForGlobal(const GlobalVariable *GV) const{ +PIC16TargetObjectFile::allocateIDATA(const GlobalVariable *GV) const{ assert(GV->hasInitializer() && "This global doesn't need space"); Constant *C = GV->getInitializer(); assert(!C->isNullValue() && "initialized globals has zero initializer"); assert(GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE && - "can split initialized RAM data only"); + "can allocate initialized RAM data only"); // Find how much space this global needs. const TargetData *TD = TM->getTargetData(); @@ -140,64 +179,47 @@ PIC16TargetObjectFile::getIDATASectionForGlobal(const GlobalVariable *GV) const{ // Go through all IDATA Sections and assign this variable // to the first available section having enough space. - PIC16Section *FoundIDATA = NULL; - for (unsigned i = 0; i < IDATASections.size(); i++) { - if (DataBankSize - IDATASections[i]->Size >= ValSize) { - FoundIDATA = IDATASections[i]; + PIC16Section *Found = NULL; + for (unsigned i = 0; i < IDATASections_.size(); i++) { + if (DataBankSize - IDATASections_[i]->getSize() >= ValSize) { + Found = IDATASections_[i]; break; } } // No IDATA section spacious enough was found. Crate a new one. - if (!FoundIDATA) { - std::string name = PAN::getIdataSectionName(IDATASections.size()); - const MCSectionPIC16 *NewSection = - getPIC16Section(name.c_str(), MCSectionPIC16::IDATA_Kind()); - - FoundIDATA = new PIC16Section(NewSection); - - // Add this newly created IDATA section to the list of IDATASections. - IDATASections.push_back(FoundIDATA); + if (!Found) { + std::string name = PAN::getIdataSectionName(IDATASections_.size()); + Found = getPIC16DataSection(name.c_str(), IDATA); } // Insert the GV into this IDATA. - FoundIDATA->Items.push_back(GV); - FoundIDATA->Size += ValSize; - return FoundIDATA->S_; + Found->Items.push_back(GV); + Found->setSize(Found->getSize() + ValSize); + return Found; } -// Get the section for an automatic variable of a function. -// For PIC16 they are globals only with mangled names. +// Allocate a program memory variable into ROMDATA section. const MCSection * -PIC16TargetObjectFile::getSectionForAuto(const GlobalVariable *GV) const { +PIC16TargetObjectFile::allocateROMDATA(const GlobalVariable *GV) const { - const std::string name = PAN::getSectionNameForSym(GV->getName()); + std::string name = PAN::getRomdataSectionName(); + PIC16Section *S = getPIC16DataSection(name.c_str(), ROMDATA); - // Go through all Auto Sections and assign this variable - // to the appropriate section. - PIC16Section *FoundAutoSec = NULL; - for (unsigned i = 0; i < AutosSections.size(); i++) { - if (AutosSections[i]->S_->getName() == name) { - FoundAutoSec = AutosSections[i]; - break; - } - } - - // No Auto section was found. Crate a new one. - if (!FoundAutoSec) { - const MCSectionPIC16 *NewSection = - getPIC16Section(name.c_str(), MCSectionPIC16::UDATA_OVR_Kind()); - - FoundAutoSec = new PIC16Section(NewSection); + S->Items.push_back(GV); + return S; +} - // Add this newly created autos section to the list of AutosSections. - AutosSections.push_back(FoundAutoSec); - } +// Get the section for an automatic variable of a function. +// For PIC16 they are globals only with mangled names. +const MCSection * +PIC16TargetObjectFile::allocateAUTO(const GlobalVariable *GV) const { - // Insert the auto into this section. - FoundAutoSec->Items.push_back(GV); + const std::string name = PAN::getSectionNameForSym(GV->getName()); + PIC16Section *S = getPIC16AutoSection(name.c_str()); - return FoundAutoSec->S_; + S->Items.push_back(GV); + return S; } @@ -214,56 +236,35 @@ PIC16TargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV1, if (!GV) return TargetLoweringObjectFile::SelectSectionForGlobal(GV1, Kind, Mang,TM); - // Record External Var Decls. - if (GV->isDeclaration()) { - ExternalVarDecls->Items.push_back(GV); - return ExternalVarDecls->S_; - } - assert(GV->hasInitializer() && "A def without initializer?"); // First, if this is an automatic variable for a function, get the section // name for it and return. std::string name = GV->getName(); if (PAN::isLocalName(name)) - return getSectionForAuto(GV); - - // Record Exteranl Var Defs. - if (GV->hasExternalLinkage() || GV->hasCommonLinkage()) - ExternalVarDefs->Items.push_back(GV); + return allocateAUTO(GV); // See if this is an uninitialized global. const Constant *C = GV->getInitializer(); if (C->isNullValue()) - return getBSSSectionForGlobal(GV); + return allocateUDATA(GV); // If this is initialized data in RAM. Put it in the correct IDATA section. if (GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) - return getIDATASectionForGlobal(GV); + return allocateIDATA(GV); // This is initialized data in rom, put it in the readonly section. if (GV->getType()->getAddressSpace() == PIC16ISD::ROM_SPACE) - return getROSectionForGlobal(GV); + return allocateROMDATA(GV); // Else let the default implementation take care of it. return TargetLoweringObjectFile::SelectSectionForGlobal(GV, Kind, Mang,TM); } -PIC16TargetObjectFile::~PIC16TargetObjectFile() { - for (unsigned i = 0; i < BSSSections.size(); i++) - delete BSSSections[i]; - for (unsigned i = 0; i < IDATASections.size(); i++) - delete IDATASections[i]; - for (unsigned i = 0; i < AutosSections.size(); i++) - delete AutosSections[i]; - for (unsigned i = 0; i < ROSections.size(); i++) - delete ROSections[i]; - delete ExternalVarDecls; - delete ExternalVarDefs; -} -/// getSpecialCasedSectionGlobals - Allow the target to completely override + +/// getExplicitSectionGlobal - Allow the target to completely override /// section assignment of a global. const MCSection *PIC16TargetObjectFile:: getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, @@ -274,167 +275,83 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, std::string SectName = GVar->getSection(); // If address for a variable is specified, get the address and create // section. + // FIXME: move this attribute checking in PAN. std::string AddrStr = "Address="; if (SectName.compare(0, AddrStr.length(), AddrStr) == 0) { std::string SectAddr = SectName.substr(AddrStr.length()); - return CreateSectionForGlobal(GVar, Mang, SectAddr); + return allocateAtGivenAddress(GVar, SectAddr); } // Create the section specified with section attribute. - return CreateSectionForGlobal(GVar, Mang); + return allocateInGivenSection(GVar); } - return getPIC16Section(GV->getSection().c_str(), Kind); + return getPIC16DataSection(GV->getSection().c_str(), UDATA); +} + +// Interface used by AsmPrinter to get a code section for a function. +const PIC16Section * +PIC16TargetObjectFile::SectionForCode(const std::string &FnName) const { + const std::string &sec_name = PAN::getCodeSectionName(FnName); + return getPIC16Section(sec_name, CODE); +} + +// Interface used by AsmPrinter to get a frame section for a function. +const PIC16Section * +PIC16TargetObjectFile::SectionForFrame(const std::string &FnName) const { + const std::string &sec_name = PAN::getFrameSectionName(FnName); + return getPIC16Section(sec_name, UDATA_OVR); } -// Create a new section for global variable. If Addr is given then create -// section at that address else create by name. +// Allocate a global var in existing or new section of given name. const MCSection * -PIC16TargetObjectFile::CreateSectionForGlobal(const GlobalVariable *GV, - Mangler *Mang, - const std::string &Addr) const { +PIC16TargetObjectFile::allocateInGivenSection(const GlobalVariable *GV) const { + // Determine the type of section that we need to create. + PIC16SectionType SecTy; + // See if this is an uninitialized global. const Constant *C = GV->getInitializer(); if (C->isNullValue()) - return CreateBSSSectionForGlobal(GV, Addr); - + SecTy = UDATA; // If this is initialized data in RAM. Put it in the correct IDATA section. - if (GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) - return CreateIDATASectionForGlobal(GV, Addr); - + else if (GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) + SecTy = IDATA; // This is initialized data in rom, put it in the readonly section. - if (GV->getType()->getAddressSpace() == PIC16ISD::ROM_SPACE) - return CreateROSectionForGlobal(GV, Addr); - - // Else let the default implementation take care of it. - return TargetLoweringObjectFile::SectionForGlobal(GV, Mang, *TM); + else if (GV->getType()->getAddressSpace() == PIC16ISD::ROM_SPACE) + SecTy = ROMDATA; + else + llvm_unreachable ("Could not determine section type for global"); + + PIC16Section *S = getPIC16UserSection(GV->getSection().c_str(), SecTy); + S->Items.push_back(GV); + return S; } -// Create uninitialized section for a variable. +// Allocate a global var in a new absolute sections at given address. const MCSection * -PIC16TargetObjectFile::CreateBSSSectionForGlobal(const GlobalVariable *GV, - std::string Addr) const { - assert(GV->hasInitializer() && "This global doesn't need space"); - assert(GV->getInitializer()->isNullValue() && - "Unitialized global has non-zero initializer"); - std::string Name; - // If address is given then create a section at that address else create a - // section by section name specified in GV. - PIC16Section *FoundBSS = NULL; - if (Addr.empty()) { - Name = GV->getSection() + " UDATA"; - for (unsigned i = 0; i < BSSSections.size(); i++) { - if (BSSSections[i]->S_->getName() == Name) { - FoundBSS = BSSSections[i]; - break; - } - } - } else { - std::string Prefix = GV->getNameStr() + "." + Addr + "."; - Name = PAN::getUdataSectionName(BSSSections.size(), Prefix) + " " + Addr; - } - - PIC16Section *NewBSS = FoundBSS; - if (NewBSS == NULL) { - const MCSectionPIC16 *NewSection = - getPIC16Section(Name.c_str(), MCSectionPIC16::UDATA_Kind()); - NewBSS = new PIC16Section(NewSection); - BSSSections.push_back(NewBSS); - } +PIC16TargetObjectFile::allocateAtGivenAddress(const GlobalVariable *GV, + const std::string &Addr) const { + // Determine the type of section that we need to create. + PIC16SectionType SecTy; - // Insert the GV into this BSS. - NewBSS->Items.push_back(GV); - - // We do not want to put any GV without explicit section into this section - // so set its size to DatabankSize. - NewBSS->Size = DataBankSize; - return NewBSS->S_; -} - -// Get rom section for a variable. Currently there can be only one rom section -// unless a variable explicitly requests a section. -const MCSection * -PIC16TargetObjectFile::getROSectionForGlobal(const GlobalVariable *GV) const { - ROSections[0]->Items.push_back(GV); - return ROSections[0]->S_; -} - -// Create initialized data section for a variable. -const MCSection * -PIC16TargetObjectFile::CreateIDATASectionForGlobal(const GlobalVariable *GV, - std::string Addr) const { - assert(GV->hasInitializer() && "This global doesn't need space"); - assert(!GV->getInitializer()->isNullValue() && - "initialized global has zero initializer"); - assert(GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE && - "can be used for initialized RAM data only"); - - std::string Name; - // If address is given then create a section at that address else create a - // section by section name specified in GV. - PIC16Section *FoundIDATASec = NULL; - if (Addr.empty()) { - Name = GV->getSection() + " IDATA"; - for (unsigned i = 0; i < IDATASections.size(); i++) { - if (IDATASections[i]->S_->getName() == Name) { - FoundIDATASec = IDATASections[i]; - break; - } - } - } else { - std::string Prefix = GV->getNameStr() + "." + Addr + "."; - Name = PAN::getIdataSectionName(IDATASections.size(), Prefix) + " " + Addr; - } - - PIC16Section *NewIDATASec = FoundIDATASec; - if (NewIDATASec == NULL) { - const MCSectionPIC16 *NewSection = - getPIC16Section(Name.c_str(), MCSectionPIC16::IDATA_Kind()); - NewIDATASec = new PIC16Section(NewSection); - IDATASections.push_back(NewIDATASec); - } - // Insert the GV into this IDATA Section. - NewIDATASec->Items.push_back(GV); - // We do not want to put any GV without explicit section into this section - // so set its size to DatabankSize. - NewIDATASec->Size = DataBankSize; - return NewIDATASec->S_; + // See if this is an uninitialized global. + const Constant *C = GV->getInitializer(); + if (C->isNullValue()) + SecTy = UDATA; + // If this is initialized data in RAM. Put it in the correct IDATA section. + else if (GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) + SecTy = IDATA; + // This is initialized data in rom, put it in the readonly section. + else if (GV->getType()->getAddressSpace() == PIC16ISD::ROM_SPACE) + SecTy = ROMDATA; + else + llvm_unreachable ("Could not determine section type for global"); + + std::string Prefix = GV->getNameStr() + "." + Addr + "."; + std::string SName = PAN::getUserSectionName(Prefix); + PIC16Section *S = getPIC16UserSection(SName.c_str(), SecTy, Addr.c_str()); + S->Items.push_back(GV); + return S; } -// Create a section in rom for a variable. -const MCSection * -PIC16TargetObjectFile::CreateROSectionForGlobal(const GlobalVariable *GV, - std::string Addr) const { - assert(GV->getType()->getAddressSpace() == PIC16ISD::ROM_SPACE && - "can be used for ROM data only"); - - std::string Name; - // If address is given then create a section at that address else create a - // section by section name specified in GV. - PIC16Section *FoundROSec = NULL; - if (Addr.empty()) { - Name = GV->getSection() + " ROMDATA"; - for (unsigned i = 1; i < ROSections.size(); i++) { - if (ROSections[i]->S_->getName() == Name) { - FoundROSec = ROSections[i]; - break; - } - } - } else { - std::string Prefix = GV->getNameStr() + "." + Addr + "."; - Name = PAN::getRomdataSectionName(ROSections.size(), Prefix) + " " + Addr; - } - - PIC16Section *NewRomSec = FoundROSec; - if (NewRomSec == NULL) { - const MCSectionPIC16 *NewSection = - getPIC16Section(Name.c_str(), MCSectionPIC16::ROMDATA_Kind()); - NewRomSec = new PIC16Section(NewSection); - ROSections.push_back(NewRomSec); - } - - // Insert the GV into this ROM Section. - NewRomSec->Items.push_back(GV); - return NewRomSec->S_; -} diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.h b/lib/Target/PIC16/PIC16TargetObjectFile.h index 75f6cce..ca07bed 100644 --- a/lib/Target/PIC16/PIC16TargetObjectFile.h +++ b/lib/Target/PIC16/PIC16TargetObjectFile.h @@ -10,6 +10,8 @@ #ifndef LLVM_TARGET_PIC16_TARGETOBJECTFILE_H #define LLVM_TARGET_PIC16_TARGETOBJECTFILE_H +#include "PIC16.h" +#include "PIC16ABINames.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/ADT/StringMap.h" #include <vector> @@ -19,7 +21,7 @@ namespace llvm { class GlobalVariable; class Module; class PIC16TargetMachine; - class MCSectionPIC16; + class PIC16Section; enum { DataBankSize = 80 }; @@ -29,91 +31,128 @@ namespace llvm { /// again and printing only those that match the current section. /// Keeping values inside the sections make printing a section much easier. /// - /// FIXME: MOVE ALL THIS STUFF TO MCSectionPIC16. + /// FIXME: MOVE ALL THIS STUFF TO PIC16Section. /// - struct PIC16Section { - const MCSectionPIC16 *S_; // Connection to actual Section. - unsigned Size; // Total size of the objects contained. - bool SectionPrinted; - std::vector<const GlobalVariable*> Items; - - PIC16Section(const MCSectionPIC16 *s) { - S_ = s; - Size = 0; - SectionPrinted = false; - } - bool isPrinted() const { return SectionPrinted; } - void setPrintedStatus(bool status) { SectionPrinted = status; } - }; - + + /// PIC16TargetObjectFile - PIC16 Object file. Contains data and code + /// sections. + // PIC16 Object File has two types of sections. + // 1. Standard Sections + // 1.1 un-initialized global data + // 1.2 initialized global data + // 1.3 program memory data + // 1.4 local variables of functions. + // 2. User defined sections + // 2.1 Objects placed in a specific section. (By _Section() macro) + // 2.2 Objects placed at a specific address. (By _Address() macro) class PIC16TargetObjectFile : public TargetLoweringObjectFile { /// SectionsByName - Bindings of names to allocated sections. - mutable StringMap<MCSectionPIC16*> SectionsByName; + mutable StringMap<PIC16Section*> SectionsByName; const TargetMachine *TM; - const MCSectionPIC16 *getPIC16Section(const char *Name, - SectionKind K, - int Address = -1, - int Color = -1) const; - public: - mutable std::vector<PIC16Section*> BSSSections; - mutable std::vector<PIC16Section*> IDATASections; - mutable std::vector<PIC16Section*> AutosSections; - mutable std::vector<PIC16Section*> ROSections; - mutable PIC16Section *ExternalVarDecls; - mutable PIC16Section *ExternalVarDefs; + /// Lists of sections. + /// Standard Data Sections. + mutable std::vector<PIC16Section *> UDATASections_; + mutable std::vector<PIC16Section *> IDATASections_; + mutable PIC16Section * ROMDATASection_; + + /// Standard Auto Sections. + mutable std::vector<PIC16Section *> AUTOSections_; + + /// User specified sections. + mutable std::vector<PIC16Section *> USERSections_; + + + /// Find or Create a PIC16 Section, without adding it to any + /// section list. + PIC16Section *getPIC16Section(const std::string &Name, + PIC16SectionType Ty, + const std::string &Address = "", + int Color = -1) const; + + /// Convenience functions. These wrappers also take care of adding + /// the newly created section to the appropriate sections list. + + /// Find or Create PIC16 Standard Data Section. + PIC16Section *getPIC16DataSection(const std::string &Name, + PIC16SectionType Ty, + const std::string &Address = "", + int Color = -1) const; + + /// Find or Create PIC16 Standard Auto Section. + PIC16Section *getPIC16AutoSection(const std::string &Name, + PIC16SectionType Ty = UDATA_OVR, + const std::string &Address = "", + int Color = -1) const; + + /// Find or Create PIC16 Standard Auto Section. + PIC16Section *getPIC16UserSection(const std::string &Name, + PIC16SectionType Ty, + const std::string &Address = "", + int Color = -1) const; + + /// Allocate Un-initialized data to a standard UDATA section. + const MCSection *allocateUDATA(const GlobalVariable *GV) const; + + /// Allocate Initialized data to a standard IDATA section. + const MCSection *allocateIDATA(const GlobalVariable *GV) const; + /// Allocate ROM data to the standard ROMDATA section. + const MCSection *allocateROMDATA(const GlobalVariable *GV) const; + + /// Allocate an AUTO variable to an AUTO section. + const MCSection *allocateAUTO(const GlobalVariable *GV) const; + + /// Allocate DATA in user specified section. + const MCSection *allocateInGivenSection(const GlobalVariable *GV) const; + + /// Allocate DATA at user specified address. + const MCSection *allocateAtGivenAddress(const GlobalVariable *GV, + const std::string &Addr) const; + + public: PIC16TargetObjectFile(); ~PIC16TargetObjectFile(); - void Initialize(MCContext &Ctx, const TargetMachine &TM); - + /// Return the section with the given Name. Null if not found. + PIC16Section *findPIC16Section(const std::string &Name); + + /// Override section allocations for user specified sections. virtual const MCSection * getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const; + /// Select sections for Data and Auto variables(globals). virtual const MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine&) const; - const MCSection *getSectionForFunction(const std::string &FnName) const; - const MCSection *getSectionForFunctionFrame(const std::string &FnName)const; - - - private: - std::string getSectionNameForSym(const std::string &Sym) const; - - const MCSection *getBSSSectionForGlobal(const GlobalVariable *GV) const; - const MCSection *getIDATASectionForGlobal(const GlobalVariable *GV) const; - const MCSection *getSectionForAuto(const GlobalVariable *GV) const; - const MCSection *CreateBSSSectionForGlobal(const GlobalVariable *GV, - std::string Addr = "") const; - const MCSection *CreateIDATASectionForGlobal(const GlobalVariable *GV, - std::string Addr = "") const; - const MCSection *getROSectionForGlobal(const GlobalVariable *GV) const; - const MCSection *CreateROSectionForGlobal(const GlobalVariable *GV, - std::string Addr = "") const; - const MCSection *CreateSectionForGlobal(const GlobalVariable *GV, - Mangler *Mang, - const std::string &Addr = "") const; - public: - void SetSectionForGVs(Module &M); - const std::vector<PIC16Section*> &getBSSSections() const { - return BSSSections; + + /// Return a code section for a function. + const PIC16Section *SectionForCode (const std::string &FnName) const; + + /// Return a frame section for a function. + const PIC16Section *SectionForFrame (const std::string &FnName) const; + + /// Accessors for various section lists. + const std::vector<PIC16Section *> &UDATASections() const { + return UDATASections_; } - const std::vector<PIC16Section*> &getIDATASections() const { - return IDATASections; + const std::vector<PIC16Section *> &IDATASections() const { + return IDATASections_; } - const std::vector<PIC16Section*> &getAutosSections() const { - return AutosSections; + const PIC16Section *ROMDATASection() const { + return ROMDATASection_; } - const std::vector<PIC16Section*> &getROSections() const { - return ROSections; + const std::vector<PIC16Section *> &AUTOSections() const { + return AUTOSections_; + } + const std::vector<PIC16Section *> &USERSections() const { + return USERSections_; } - }; } // end namespace llvm diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp index a0fba86..dc6b852 100644 --- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp @@ -1129,7 +1129,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { // implementation of multiple entry points). If this doesn't occur, the // linker can safely perform dead code stripping. Since LLVM never generates // code that does this, it is always safe to set. - O << "\t.subsections_via_symbols\n"; + OutStreamer.EmitAssemblerFlag(MCStreamer::SubsectionsViaSymbols); return AsmPrinter::doFinalization(M); } diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 89ea9d0..2740387 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -1594,23 +1594,8 @@ int int_char(char m) {if(m>7) return 0; return m;} //===---------------------------------------------------------------------===// -Instcombine should replace the load with a constant in: - - static const char x[4] = {'a', 'b', 'c', 'd'}; - - unsigned int y(void) { - return *(unsigned int *)x; - } - -It currently only does this transformation when the size of the constant -is the same as the size of the integer (so, try x[5]) and the last byte -is a null (making it a C string). There's no need for these restrictions. - -//===---------------------------------------------------------------------===// - -InstCombine's "turn load from constant into constant" optimization should be -more aggressive in the presence of bitcasts. For example, because of unions, -this code: +libanalysis is not aggressively folding vector bitcasts. For example, the +constant expressions generated when compiling this code: union vec2d { double e[2]; @@ -1624,23 +1609,29 @@ vec2d foo () { return (vec2d){ .v = a.v + b.v * (vec2d){{5,5}}.v }; } -Compiles into: +in X86-32 end up being: -@a = internal constant %0 { [2 x double] - [double 1.000000e+00, double 2.000000e+00] }, align 16 -@b = internal constant %0 { [2 x double] - [double 3.000000e+00, double 4.000000e+00] }, align 16 -... -define void @foo(%struct.vec2d* noalias nocapture sret %agg.result) nounwind { +define void @foo(%union.vec2d* noalias nocapture sret %agg.result) nounwind ssp { entry: - %0 = load <2 x double>* getelementptr (%struct.vec2d* - bitcast (%0* @a to %struct.vec2d*), i32 0, i32 0), align 16 - %1 = load <2 x double>* getelementptr (%struct.vec2d* - bitcast (%0* @b to %struct.vec2d*), i32 0, i32 0), align 16 + %agg.result.0 = getelementptr %union.vec2d* %agg.result, i32 0, i32 0 ; <<2 x double>*> [#uses=1] + store <2 x double> fadd (<2 x double> bitcast (<1 x i128> <i128 85070591730234615870450834276742070272> to <2 x double>), <2 x double> fmul (<2 x double> bitcast (<1 x i128> <i128 85153668479971173112514077617450647552> to <2 x double>), <2 x double> <double 5.000000e+00, double 5.000000e+00>)), <2 x double>* %agg.result.0, align 16 + ret void +} +and in X86-64 mode: -Instcombine should be able to optimize away the loads (and thus the globals). +define %0 @foo() nounwind readnone ssp { +entry: + %mrv5 = insertvalue %0 undef, double extractelement (<2 x double> fadd (<2 x double> bitcast (<1 x i128> <i128 85070591730234615870450834276742070272> to <2 x double>), <2 x double> fmul (<2 x double> bitcast (<1 x i128> <i128 85153668479971173112514077617450647552> to <2 x double>), <2 x double> bitcast (<1 x i128> <i128 85174437667405312423031577302488055808> to <2 x double>))), i32 0), 0 ; <%0> [#uses=1] + %mrv6 = insertvalue %0 %mrv5, double extractelement (<2 x double> fadd (<2 x double> bitcast (<1 x i128> <i128 85070591730234615870450834276742070272> to <2 x double>), <2 x double> fmul (<2 x double> bitcast (<1 x i128> <i128 85153668479971173112514077617450647552> to <2 x double>), <2 x double> bitcast (<1 x i128> <i128 85174437667405312423031577302488055808> to <2 x double>))), i32 1), 1 ; <%0> [#uses=1] + ret %0 %mrv6 +} -See also PR4973 +//===---------------------------------------------------------------------===// + +IPSCCP is propagating elements of first class aggregates, but is not propagating +the entire aggregate itself. This leads it to miss opportunities, for example +in test/Transforms/SCCP/ipsccp-basic.ll:test5b. //===---------------------------------------------------------------------===// + diff --git a/lib/Target/TargetIntrinsicInfo.cpp b/lib/Target/TargetIntrinsicInfo.cpp index d8da08e..e049a1d 100644 --- a/lib/Target/TargetIntrinsicInfo.cpp +++ b/lib/Target/TargetIntrinsicInfo.cpp @@ -12,11 +12,19 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Function.h" +#include "llvm/ADT/StringMap.h" using namespace llvm; -TargetIntrinsicInfo::TargetIntrinsicInfo(const char **desc, unsigned count) - : Intrinsics(desc), NumIntrinsics(count) { +TargetIntrinsicInfo::TargetIntrinsicInfo() { } TargetIntrinsicInfo::~TargetIntrinsicInfo() { } + +unsigned TargetIntrinsicInfo::getIntrinsicID(Function *F) const { + const ValueName *ValName = F->getValueName(); + if (!ValName) + return 0; + return lookupName(ValName->getKeyData(), ValName->getKeyLength()); +} diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp index bc70ffe..8ec5b62 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp @@ -57,9 +57,7 @@ void X86ATTInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo) { } } -void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, - const char *Modifier) { - assert(Modifier == 0 && "Modifiers should not be used"); +void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h index 5f28fa4..3180618 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h @@ -32,8 +32,7 @@ public: static const char *getRegisterName(unsigned RegNo); - void printOperand(const MCInst *MI, unsigned OpNo, - const char *Modifier = 0); + void printOperand(const MCInst *MI, unsigned OpNo); void printMemReference(const MCInst *MI, unsigned Op); void printLeaMemReference(const MCInst *MI, unsigned Op); void printSSECC(const MCInst *MI, unsigned Op); diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index 2a0290d..ae8e6d3 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -225,7 +225,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { std::string Name = Mang->getMangledName(GV, Suffix, Suffix[0] != '\0'); if (Subtarget->isTargetCygMing()) { - X86COFFMachineModuleInfo &COFFMMI = + X86COFFMachineModuleInfo &COFFMMI = MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); COFFMMI.DecorateCygMingName(Name, GV, *TM.getTargetData()); } @@ -288,12 +288,12 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { std::string Name = Mang->makeNameProper(MO.getSymbolName()); if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) { Name += "$stub"; - MCSymbol *Sym = OutContext.GetOrCreateSymbol(Name); + MCSymbol *Sym = OutContext.GetOrCreateSymbol(StringRef(Name)); const MCSymbol *&StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym); if (StubSym == 0) { Name.erase(Name.end()-5, Name.end()); - StubSym = OutContext.GetOrCreateSymbol(Name); + StubSym = OutContext.GetOrCreateSymbol(StringRef(Name)); } } @@ -870,49 +870,55 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { // implementation of multiple entry points). If this doesn't occur, the // linker can safely perform dead code stripping. Since LLVM never // generates code that does this, it is always safe to set. - O << "\t.subsections_via_symbols\n"; - } - - if (Subtarget->isTargetCOFF()) { - // Necessary for dllexport support - std::vector<std::string> DLLExportedFns, DLLExportedGlobals; + OutStreamer.EmitAssemblerFlag(MCStreamer::SubsectionsViaSymbols); + } - X86COFFMachineModuleInfo &COFFMMI = + if (Subtarget->isTargetCOFF()) { + X86COFFMachineModuleInfo &COFFMMI = MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); - TargetLoweringObjectFileCOFF &TLOFCOFF = - static_cast<TargetLoweringObjectFileCOFF&>(getObjFileLowering()); - for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) - if (I->hasDLLExportLinkage()) - DLLExportedFns.push_back(Mang->getMangledName(I)); - - for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - if (I->hasDLLExportLinkage()) - DLLExportedGlobals.push_back(Mang->getMangledName(I)); - - if (Subtarget->isTargetCygMing()) { - // Emit type information for external functions - for (X86COFFMachineModuleInfo::stub_iterator I = COFFMMI.stub_begin(), + // Emit type information for external functions + for (X86COFFMachineModuleInfo::stub_iterator I = COFFMMI.stub_begin(), E = COFFMMI.stub_end(); I != E; ++I) { - O << "\t.def\t " << I->getKeyData() + O << "\t.def\t " << I->getKeyData() << ";\t.scl\t" << COFF::C_EXT << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT) << ";\t.endef\n"; - } } - - // Output linker support code for dllexported globals on windows. - if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) { - OutStreamer.SwitchSection(TLOFCOFF.getCOFFSection(".section .drectve", - true, + + if (Subtarget->isTargetCygMing()) { + // Necessary for dllexport support + std::vector<std::string> DLLExportedFns, DLLExportedGlobals; + + TargetLoweringObjectFileCOFF &TLOFCOFF = + static_cast<TargetLoweringObjectFileCOFF&>(getObjFileLowering()); + + for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) + if (I->hasDLLExportLinkage()) { + std::string Name = Mang->getMangledName(I); + COFFMMI.DecorateCygMingName(Name, I, *TM.getTargetData()); + DLLExportedFns.push_back(Name); + } + + for (Module::const_global_iterator I = M.global_begin(), + E = M.global_end(); I != E; ++I) + if (I->hasDLLExportLinkage()) { + std::string Name = Mang->getMangledName(I); + COFFMMI.DecorateCygMingName(Name, I, *TM.getTargetData()); + DLLExportedGlobals.push_back(Mang->getMangledName(I)); + } + + // Output linker support code for dllexported globals on windows. + if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) { + OutStreamer.SwitchSection(TLOFCOFF.getCOFFSection(".section .drectve", + true, SectionKind::getMetadata())); - - for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) - O << "\t.ascii \" -export:" << DLLExportedGlobals[i] << ",data\"\n"; - - for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) - O << "\t.ascii \" -export:" << DLLExportedFns[i] << "\"\n"; + for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) + O << "\t.ascii \" -export:" << DLLExportedGlobals[i] << ",data\"\n"; + + for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) + O << "\t.ascii \" -export:" << DLLExportedFns[i] << "\"\n"; + } } } } diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index 5ccddf5..d498c57 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -38,10 +38,8 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { MCSymbol *X86MCInstLower::GetPICBaseSymbol() const { - SmallString<60> Name; - raw_svector_ostream(Name) << AsmPrinter.MAI->getPrivateGlobalPrefix() - << AsmPrinter.getFunctionNumber() << "$pb"; - return Ctx.GetOrCreateSymbol(Name.str()); + return Ctx.GetOrCreateSymbol(Twine(AsmPrinter.MAI->getPrivateGlobalPrefix())+ + Twine(AsmPrinter.getFunctionNumber())+"$pb"); } @@ -308,6 +306,8 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { MI->dump(); llvm_unreachable("unknown operand type"); case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) continue; MCOp = MCOperand::CreateReg(MO.getReg()); break; case MachineOperand::MO_Immediate: @@ -449,10 +449,9 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { // MYGLOBAL + (. - PICBASE) // However, we can't generate a ".", so just emit a new label here and refer // to it. We know that this operand flag occurs at most once per function. - SmallString<64> Name; - raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() - << "picbaseref" << getFunctionNumber(); - MCSymbol *DotSym = OutContext.GetOrCreateSymbol(Name.str()); + const char *Prefix = MAI->getPrivateGlobalPrefix(); + MCSymbol *DotSym = OutContext.GetOrCreateSymbol(Twine(Prefix)+"picbaseref"+ + Twine(getFunctionNumber())); OutStreamer.EmitLabel(DotSym); // Now that we have emitted the label, lower the complex operand expression. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fadc818..e5e7bc8 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2286,6 +2286,8 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP, case ISD::SETNE: return X86::COND_NE; case ISD::SETUO: return X86::COND_P; case ISD::SETO: return X86::COND_NP; + case ISD::SETOEQ: + case ISD::SETUNE: return X86::COND_INVALID; } } @@ -2389,6 +2391,56 @@ bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) { return ::isPSHUFLWMask(M, N->getValueType(0)); } +/// isPALIGNRMask - Return true if the node specifies a shuffle of elements that +/// is suitable for input to PALIGNR. +static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT, + bool hasSSSE3) { + int i, e = VT.getVectorNumElements(); + + // Do not handle v2i64 / v2f64 shuffles with palignr. + if (e < 4 || !hasSSSE3) + return false; + + for (i = 0; i != e; ++i) + if (Mask[i] >= 0) + break; + + // All undef, not a palignr. + if (i == e) + return false; + + // Determine if it's ok to perform a palignr with only the LHS, since we + // don't have access to the actual shuffle elements to see if RHS is undef. + bool Unary = Mask[i] < (int)e; + bool NeedsUnary = false; + + int s = Mask[i] - i; + + // Check the rest of the elements to see if they are consecutive. + for (++i; i != e; ++i) { + int m = Mask[i]; + if (m < 0) + continue; + + Unary = Unary && (m < (int)e); + NeedsUnary = NeedsUnary || (m < s); + + if (NeedsUnary && !Unary) + return false; + if (Unary && m != ((s+i) & (e-1))) + return false; + if (!Unary && m != (s+i)) + return false; + } + return true; +} + +bool X86::isPALIGNRMask(ShuffleVectorSDNode *N) { + SmallVector<int, 8> M; + N->getMask(M); + return ::isPALIGNRMask(M, N->getValueType(0), true); +} + /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to SHUFP*. static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) { @@ -2733,8 +2785,7 @@ bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) { } /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle -/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* -/// instructions. +/// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions. unsigned X86::getShuffleSHUFImmediate(SDNode *N) { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); int NumOperands = SVOp->getValueType(0).getVectorNumElements(); @@ -2753,8 +2804,7 @@ unsigned X86::getShuffleSHUFImmediate(SDNode *N) { } /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle -/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW -/// instructions. +/// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction. unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); unsigned Mask = 0; @@ -2770,8 +2820,7 @@ unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { } /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle -/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW -/// instructions. +/// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction. unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); unsigned Mask = 0; @@ -2786,6 +2835,23 @@ unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { return Mask; } +/// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction. +unsigned X86::getShufflePALIGNRImmediate(SDNode *N) { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + EVT VVT = N->getValueType(0); + unsigned EltSize = VVT.getVectorElementType().getSizeInBits() >> 3; + int Val = 0; + + unsigned i, e; + for (i = 0, e = VVT.getVectorNumElements(); i != e; ++i) { + Val = SVOp->getMaskElt(i); + if (Val >= 0) + break; + } + return (Val - i) * EltSize; +} + /// isZeroNode - Returns true if Elt is a constant zero or a floating point /// constant +0.0. bool X86::isZeroNode(SDValue Elt) { @@ -5502,6 +5568,8 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); + if (X86CC == X86::COND_INVALID) + return SDValue(); SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG); return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, @@ -5650,8 +5718,11 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); SDValue CC; - if (Cond.getOpcode() == ISD::SETCC) - Cond = LowerSETCC(Cond, DAG); + if (Cond.getOpcode() == ISD::SETCC) { + SDValue NewCond = LowerSETCC(Cond, DAG); + if (NewCond.getNode()) + Cond = NewCond; + } // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. @@ -5724,8 +5795,11 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); SDValue CC; - if (Cond.getOpcode() == ISD::SETCC) - Cond = LowerSETCC(Cond, DAG); + if (Cond.getOpcode() == ISD::SETCC) { + SDValue NewCond = LowerSETCC(Cond, DAG); + if (NewCond.getNode()) + Cond = NewCond; + } #if 0 // FIXME: LowerXALUO doesn't handle these!! else if (Cond.getOpcode() == X86ISD::ADD || @@ -6274,6 +6348,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); unsigned X86CC = TranslateX86CC(CC, true, LHS, RHS, DAG); + assert(X86CC != X86::COND_INVALID && "Unexpected illegal condition!"); SDValue Cond = DAG.getNode(Opc, dl, MVT::i32, LHS, RHS); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(X86CC, MVT::i8), Cond); @@ -7274,7 +7349,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, if (VT.getSizeInBits() == 64) return false; - // FIXME: pshufb, blends, palignr, shifts. + // FIXME: pshufb, blends, shifts. return (VT.getVectorNumElements() == 2 || ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isMOVLMask(M, VT) || @@ -7282,6 +7357,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, isPSHUFDMask(M, VT) || isPSHUFHWMask(M, VT) || isPSHUFLWMask(M, VT) || + isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) || isUNPCKLMask(M, VT) || isUNPCKHMask(M, VT) || isUNPCKL_v_undef_Mask(M, VT) || diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 2f7b8ba..66a9107 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -323,21 +323,27 @@ namespace llvm { /// specifies a shuffle of elements that is suitable for input to MOVDDUP. bool isMOVDDUPMask(ShuffleVectorSDNode *N); + /// isPALIGNRMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a shuffle of elements that is suitable for input to PALIGNR. + bool isPALIGNRMask(ShuffleVectorSDNode *N); + /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* /// instructions. unsigned getShuffleSHUFImmediate(SDNode *N); /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle - /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW - /// instructions. + /// the specified VECTOR_SHUFFLE mask with PSHUFHW instruction. unsigned getShufflePSHUFHWImmediate(SDNode *N); - /// getShufflePSHUFKWImmediate - Return the appropriate immediate to shuffle - /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW - /// instructions. + /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle + /// the specified VECTOR_SHUFFLE mask with PSHUFLW instruction. unsigned getShufflePSHUFLWImmediate(SDNode *N); + /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle + /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction. + unsigned getShufflePALIGNRImmediate(SDNode *N); + /// isZeroNode - Returns true if Elt is a constant zero or a floating point /// constant +0.0. bool isZeroNode(SDValue Elt); diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index ef19823..c1b7b8f 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -368,19 +368,15 @@ def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), // Use movzbl instead of movzbq when the destination is a register; it's // equivalent due to implicit zero-extending, and it has a smaller encoding. def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), - "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", - [(set GR64:$dst, (zext GR8:$src))]>, TB; + "", [(set GR64:$dst, (zext GR8:$src))]>, TB; def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), - "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", - [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB; + "", [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB; // Use movzwl instead of movzwq when the destination is a register; it's // equivalent due to implicit zero-extending, and it has a smaller encoding. def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), - "movz{wl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", - [(set GR64:$dst, (zext GR16:$src))]>, TB; + "", [(set GR64:$dst, (zext GR16:$src))]>, TB; def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), - "movz{wl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", - [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB; + "", [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB; // There's no movzlq instruction, but movl can be used for this purpose, using // implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero @@ -390,11 +386,9 @@ def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), // necessarily all zero. In such cases, we fall back to these explicit zext // instructions. def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src), - "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", - [(set GR64:$dst, (zext GR32:$src))]>; + "", [(set GR64:$dst, (zext GR32:$src))]>; def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), - "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", - [(set GR64:$dst, (zextloadi64i32 addr:$src))]>; + "", [(set GR64:$dst, (zextloadi64i32 addr:$src))]>; // Any instruction that defines a 32-bit result leaves the high half of the // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may @@ -1455,8 +1449,7 @@ def : Pat<(i64 0), // Materialize i64 constant where top 32-bits are zero. let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), - "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", - [(set GR64:$dst, i64immZExt32:$src)]>; + "", [(set GR64:$dst, i64immZExt32:$src)]>; //===----------------------------------------------------------------------===// // Thread Local Storage Instructions @@ -1515,6 +1508,7 @@ def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val) } // Optimized codegen when the non-memory output is not used. +let Defs = [EFLAGS] in { // FIXME: Use normal add / sub instructions and add lock prefix dynamically. def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "lock\n\t" @@ -1544,7 +1538,7 @@ def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "lock\n\t" "dec{q}\t$dst", []>, LOCK; - +} // Atomic exchange, and, or, xor let Constraints = "$val = $dst", Defs = [EFLAGS], usesCustomDAGSchedInserter = 1 in { diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index abdb313..2f14bb0 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -224,10 +224,10 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSSE3]>; + : Ii8<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSSE3]>; class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSSE3]>; + : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSSE3]>; // SSE4.1 Instruction Templates: // diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 30b57d8..16b2af7 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -3392,11 +3392,9 @@ def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2), // of the register here. This has a smaller encoding and avoids a // partial-register update. def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src), - "movs{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", - [(set GR16:$dst, (sext GR8:$src))]>, TB; + "", [(set GR16:$dst, (sext GR8:$src))]>, TB; def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src), - "movs{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", - [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB; + "", [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB; def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (sext GR8:$src))]>, TB; @@ -3414,11 +3412,9 @@ def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), // of the register here. This has a smaller encoding and avoids a // partial-register update. def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src), - "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", - [(set GR16:$dst, (zext GR8:$src))]>, TB; + "", [(set GR16:$dst, (zext GR8:$src))]>, TB; def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src), - "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", - [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB; + "", [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB; def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (zext GR8:$src))]>, TB; @@ -3474,9 +3470,8 @@ def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), [(set GR8:$dst, 0)]>; // Use xorl instead of xorw since we don't care about the high 16 bits, // it's smaller, and it avoids a partial-register update. -def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), - "xor{l}\t${dst:subreg32}, ${dst:subreg32}", - [(set GR16:$dst, 0)]>; +def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), + "", [(set GR16:$dst, 0)]>; def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "xor{l}\t$dst, $dst", [(set GR32:$dst, 0)]>; @@ -3598,6 +3593,7 @@ def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val), // Optimized codegen when the non-memory output is not used. // FIXME: Use normal add / sub instructions and add lock prefix dynamically. +let Defs = [EFLAGS] in { def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), "lock\n\t" "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; @@ -3667,6 +3663,7 @@ def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "lock\n\t" "dec{l}\t$dst", []>, LOCK; +} // Atomic exchange, and, or, xor let Constraints = "$val = $dst", Defs = [EFLAGS], diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 96fc932..f4e97c9 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -197,6 +197,12 @@ def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{ return getI8Imm(X86::getShufflePSHUFLWImmediate(N)); }]>; +// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to +// a PALIGNR imm. +def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{ + return getI8Imm(X86::getShufflePALIGNRImmediate(N)); +}]>; + def splat_lo : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); @@ -283,6 +289,11 @@ def pshuflw : PatFrag<(ops node:$lhs, node:$rhs), return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N)); }], SHUFFLE_get_pshuflw_imm>; +def palign : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N)); +}], SHUFFLE_get_palign_imm>; + //===----------------------------------------------------------------------===// // SSE scalar FP Instructions //===----------------------------------------------------------------------===// @@ -2062,6 +2073,7 @@ defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>; defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>; // Shuffle and unpack instructions +let AddedComplexity = 5 in { def PSHUFDri : PDIi8<0x70, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -2073,6 +2085,7 @@ def PSHUFDmi : PDIi8<0x70, MRMSrcMem, [(set VR128:$dst, (v4i32 (pshufd:$src2 (bc_v4i32(memopv2i64 addr:$src1)), (undef))))]>; +} // SSE2 with ImmT == Imm8 and XS prefix. def PSHUFHWri : Ii8<0x70, MRMSrcReg, @@ -2839,6 +2852,26 @@ let Constraints = "$src1 = $dst" in { imm:$src3))]>, OpSize; } +// palignr patterns. +let AddedComplexity = 5 in { +def : Pat<(v4i32 (palign:$src3 VR128:$src1, VR128:$src2)), + (PALIGNR128rr VR128:$src2, VR128:$src1, + (SHUFFLE_get_palign_imm VR128:$src3))>, + Requires<[HasSSSE3]>; +def : Pat<(v4f32 (palign:$src3 VR128:$src1, VR128:$src2)), + (PALIGNR128rr VR128:$src2, VR128:$src1, + (SHUFFLE_get_palign_imm VR128:$src3))>, + Requires<[HasSSSE3]>; +def : Pat<(v8i16 (palign:$src3 VR128:$src1, VR128:$src2)), + (PALIGNR128rr VR128:$src2, VR128:$src1, + (SHUFFLE_get_palign_imm VR128:$src3))>, + Requires<[HasSSSE3]>; +def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)), + (PALIGNR128rr VR128:$src2, VR128:$src1, + (SHUFFLE_get_palign_imm VR128:$src3))>, + Requires<[HasSSSE3]>; +} + def : Pat<(X86pshufb VR128:$src, VR128:$mask), (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>; def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index fb76aeb..9525f04 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -382,7 +382,6 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, , HasFMA4(false) , IsBTMemSlow(false) , DarwinVers(0) - , IsLinux(false) , stackAlignment(8) // FIXME: this is a known good value for Yonah. How about others? , MaxInlineSizeThreshold(128) @@ -434,7 +433,6 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, } else if (TT.find("linux") != std::string::npos) { // Linux doesn't imply ELF, but we don't currently support anything else. TargetType = isELF; - IsLinux = true; } else if (TT.find("cygwin") != std::string::npos) { TargetType = isCygwin; } else if (TT.find("mingw") != std::string::npos) { diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index a2e368d..e64b854 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -82,9 +82,6 @@ protected: /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc. unsigned char DarwinVers; // Is any darwin-x86 platform. - /// isLinux - true if this is a "linux" platform. - bool IsLinux; - /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -195,11 +192,7 @@ public: /// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard, /// 10 = Snow Leopard, etc. unsigned getDarwinVers() const { return DarwinVers; } - - /// isLinux - Return true if the target is "Linux". - bool isLinux() const { return IsLinux; } - - + /// ClassifyGlobalReference - Classify a global variable reference for the /// current subtarget according to how we should reference it in a non-pcrel /// context. @@ -222,6 +215,14 @@ public: /// indicating the number of scheduling cycles of backscheduling that /// should be attempted. unsigned getSpecialAddressLatency() const; + + /// enablePostRAScheduler - X86 target is enabling post-alloc scheduling + /// at 'More' optimization level. + bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, + TargetSubtarget::AntiDepBreakMode& mode) const { + mode = TargetSubtarget::ANTIDEP_CRITICAL; + return OptLevel >= CodeGenOpt::Default; + } }; } // End llvm namespace diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp index 8000d0d..91534a7 100644 --- a/lib/Transforms/Hello/Hello.cpp +++ b/lib/Transforms/Hello/Hello.cpp @@ -30,9 +30,8 @@ namespace { virtual bool runOnFunction(Function &F) { HelloCounter++; - std::string fname = F.getName(); - EscapeString(fname); - errs() << "Hello: " << fname << "\n"; + errs() << "Hello: "; + errs().write_escaped(F.getName()) << '\n'; return false; } }; @@ -49,9 +48,8 @@ namespace { virtual bool runOnFunction(Function &F) { HelloCounter++; - std::string fname = F.getName(); - EscapeString(fname); - errs() << "Hello: " << fname << "\n"; + errs() << "Hello: "; + errs().write_escaped(F.getName()) << '\n'; return false; } diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index ec0f1e1..5c28801 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -9,7 +9,6 @@ add_llvm_library(LLVMipo GlobalOpt.cpp IPConstantPropagation.cpp IPO.cpp - IndMemRemoval.cpp InlineAlways.cpp InlineSimple.cpp Inliner.cpp diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 7edaa7f..0701b94 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -153,7 +153,7 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) { // Writes memory. Just give up. return false; - if (isa<MallocInst>(I)) + if (isMalloc(I)) // malloc claims not to write memory! PR3754. return false; @@ -267,11 +267,8 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F, // Check whether the pointer came from an allocation. case Instruction::Alloca: - case Instruction::Malloc: break; case Instruction::Call: - if (isMalloc(RVI)) - break; case Instruction::Invoke: { CallSite CS(RVI); if (CS.paramHasAttr(0, Attribute::NoAlias)) diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 09f9e7c..8f4e8b3 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -149,8 +149,6 @@ bool GlobalDCE::runOnModule(Module &M) { // Make sure that all memory is released AliveGlobals.clear(); - // Remove dead metadata. - Changed |= M.getContext().RemoveDeadMetadata(); return Changed; } diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index a44386e..9ced2e8 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -822,140 +822,18 @@ static void ConstantPropUsersOf(Value *V, LLVMContext &Context) { /// malloc, there is no reason to actually DO the malloc. Instead, turn the /// malloc into a global, and any loads of GV as uses of the new global. static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, - MallocInst *MI, - LLVMContext &Context) { - DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV << " MALLOC = " << *MI); - ConstantInt *NElements = cast<ConstantInt>(MI->getArraySize()); - - if (NElements->getZExtValue() != 1) { - // If we have an array allocation, transform it to a single element - // allocation to make the code below simpler. - Type *NewTy = ArrayType::get(MI->getAllocatedType(), - NElements->getZExtValue()); - MallocInst *NewMI = - new MallocInst(NewTy, Constant::getNullValue(Type::getInt32Ty(Context)), - MI->getAlignment(), MI->getName(), MI); - Value* Indices[2]; - Indices[0] = Indices[1] = Constant::getNullValue(Type::getInt32Ty(Context)); - Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2, - NewMI->getName()+".el0", MI); - MI->replaceAllUsesWith(NewGEP); - MI->eraseFromParent(); - MI = NewMI; - } - - // Create the new global variable. The contents of the malloc'd memory is - // undefined, so initialize with an undef value. - // FIXME: This new global should have the alignment returned by malloc. Code - // could depend on malloc returning large alignment (on the mac, 16 bytes) but - // this would only guarantee some lower alignment. - Constant *Init = UndefValue::get(MI->getAllocatedType()); - GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), - MI->getAllocatedType(), false, - GlobalValue::InternalLinkage, Init, - GV->getName()+".body", - GV, - GV->isThreadLocal()); - - // Anything that used the malloc now uses the global directly. - MI->replaceAllUsesWith(NewGV); - - Constant *RepValue = NewGV; - if (NewGV->getType() != GV->getType()->getElementType()) - RepValue = ConstantExpr::getBitCast(RepValue, - GV->getType()->getElementType()); - - // If there is a comparison against null, we will insert a global bool to - // keep track of whether the global was initialized yet or not. - GlobalVariable *InitBool = - new GlobalVariable(Context, Type::getInt1Ty(Context), false, - GlobalValue::InternalLinkage, - ConstantInt::getFalse(Context), GV->getName()+".init", - GV->isThreadLocal()); - bool InitBoolUsed = false; - - // Loop over all uses of GV, processing them in turn. - std::vector<StoreInst*> Stores; - while (!GV->use_empty()) - if (LoadInst *LI = dyn_cast<LoadInst>(GV->use_back())) { - while (!LI->use_empty()) { - Use &LoadUse = LI->use_begin().getUse(); - if (!isa<ICmpInst>(LoadUse.getUser())) - LoadUse = RepValue; - else { - ICmpInst *CI = cast<ICmpInst>(LoadUse.getUser()); - // Replace the cmp X, 0 with a use of the bool value. - Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", CI); - InitBoolUsed = true; - switch (CI->getPredicate()) { - default: llvm_unreachable("Unknown ICmp Predicate!"); - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: - LV = ConstantInt::getFalse(Context); // X < null -> always false - break; - case ICmpInst::ICMP_ULE: - case ICmpInst::ICMP_SLE: - case ICmpInst::ICMP_EQ: - LV = BinaryOperator::CreateNot(LV, "notinit", CI); - break; - case ICmpInst::ICMP_NE: - case ICmpInst::ICMP_UGE: - case ICmpInst::ICMP_SGE: - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_SGT: - break; // no change. - } - CI->replaceAllUsesWith(LV); - CI->eraseFromParent(); - } - } - LI->eraseFromParent(); - } else { - StoreInst *SI = cast<StoreInst>(GV->use_back()); - // The global is initialized when the store to it occurs. - new StoreInst(ConstantInt::getTrue(Context), InitBool, SI); - SI->eraseFromParent(); - } - - // If the initialization boolean was used, insert it, otherwise delete it. - if (!InitBoolUsed) { - while (!InitBool->use_empty()) // Delete initializations - cast<Instruction>(InitBool->use_back())->eraseFromParent(); - delete InitBool; - } else - GV->getParent()->getGlobalList().insert(GV, InitBool); - - - // Now the GV is dead, nuke it and the malloc. - GV->eraseFromParent(); - MI->eraseFromParent(); - - // To further other optimizations, loop over all users of NewGV and try to - // constant prop them. This will promote GEP instructions with constant - // indices into GEP constant-exprs, which will allow global-opt to hack on it. - ConstantPropUsersOf(NewGV, Context); - if (RepValue != NewGV) - ConstantPropUsersOf(RepValue, Context); - - return NewGV; -} - -/// OptimizeGlobalAddressOfMalloc - This function takes the specified global -/// variable, and transforms the program as if it always contained the result of -/// the specified malloc. Because it is always the result of the specified -/// malloc, there is no reason to actually DO the malloc. Instead, turn the -/// malloc into a global, and any loads of GV as uses of the new global. -static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, BitCastInst *BCI, LLVMContext &Context, TargetData* TD) { + DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV + << " CALL = " << *CI << " BCI = " << *BCI << '\n'); + const Type *IntPtrTy = TD->getIntPtrType(Context); - DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV << " MALLOC = " << *CI); - - ConstantInt *NElements = cast<ConstantInt>(getMallocArraySize(CI, - Context, TD)); + Value* ArraySize = getMallocArraySize(CI, Context, TD); + assert(ArraySize && "not a malloc whose array size can be determined"); + ConstantInt *NElements = cast<ConstantInt>(ArraySize); if (NElements->getZExtValue() != 1) { // If we have an array allocation, transform it to a single element // allocation to make the code below simpler. @@ -976,9 +854,6 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, // Create the new global variable. The contents of the malloc'd memory is // undefined, so initialize with an undef value. - // FIXME: This new global should have the alignment returned by malloc. Code - // could depend on malloc returning large alignment (on the mac, 16 bytes) but - // this would only guarantee some lower alignment. const Type *MAT = getMallocAllocatedType(CI); Constant *Init = UndefValue::get(MAT); GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), @@ -1398,185 +1273,6 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, } } -/// PerformHeapAllocSRoA - MI is an allocation of an array of structures. Break -/// it up into multiple allocations of arrays of the fields. -static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI, - LLVMContext &Context){ - DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *MI); - const StructType *STy = cast<StructType>(MI->getAllocatedType()); - - // There is guaranteed to be at least one use of the malloc (storing - // it into GV). If there are other uses, change them to be uses of - // the global to simplify later code. This also deletes the store - // into GV. - ReplaceUsesOfMallocWithGlobal(MI, GV); - - // Okay, at this point, there are no users of the malloc. Insert N - // new mallocs at the same place as MI, and N globals. - std::vector<Value*> FieldGlobals; - std::vector<MallocInst*> FieldMallocs; - - for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ - const Type *FieldTy = STy->getElementType(FieldNo); - const Type *PFieldTy = PointerType::getUnqual(FieldTy); - - GlobalVariable *NGV = - new GlobalVariable(*GV->getParent(), - PFieldTy, false, GlobalValue::InternalLinkage, - Constant::getNullValue(PFieldTy), - GV->getName() + ".f" + Twine(FieldNo), GV, - GV->isThreadLocal()); - FieldGlobals.push_back(NGV); - - MallocInst *NMI = new MallocInst(FieldTy, MI->getArraySize(), - MI->getName() + ".f" + Twine(FieldNo), MI); - FieldMallocs.push_back(NMI); - new StoreInst(NMI, NGV, MI); - } - - // The tricky aspect of this transformation is handling the case when malloc - // fails. In the original code, malloc failing would set the result pointer - // of malloc to null. In this case, some mallocs could succeed and others - // could fail. As such, we emit code that looks like this: - // F0 = malloc(field0) - // F1 = malloc(field1) - // F2 = malloc(field2) - // if (F0 == 0 || F1 == 0 || F2 == 0) { - // if (F0) { free(F0); F0 = 0; } - // if (F1) { free(F1); F1 = 0; } - // if (F2) { free(F2); F2 = 0; } - // } - Value *RunningOr = 0; - for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { - Value *Cond = new ICmpInst(MI, ICmpInst::ICMP_EQ, FieldMallocs[i], - Constant::getNullValue(FieldMallocs[i]->getType()), - "isnull"); - if (!RunningOr) - RunningOr = Cond; // First seteq - else - RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", MI); - } - - // Split the basic block at the old malloc. - BasicBlock *OrigBB = MI->getParent(); - BasicBlock *ContBB = OrigBB->splitBasicBlock(MI, "malloc_cont"); - - // Create the block to check the first condition. Put all these blocks at the - // end of the function as they are unlikely to be executed. - BasicBlock *NullPtrBlock = BasicBlock::Create(Context, "malloc_ret_null", - OrigBB->getParent()); - - // Remove the uncond branch from OrigBB to ContBB, turning it into a cond - // branch on RunningOr. - OrigBB->getTerminator()->eraseFromParent(); - BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB); - - // Within the NullPtrBlock, we need to emit a comparison and branch for each - // pointer, because some may be null while others are not. - for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { - Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); - Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, - Constant::getNullValue(GVVal->getType()), - "tmp"); - BasicBlock *FreeBlock = BasicBlock::Create(Context, "free_it", - OrigBB->getParent()); - BasicBlock *NextBlock = BasicBlock::Create(Context, "next", - OrigBB->getParent()); - BranchInst::Create(FreeBlock, NextBlock, Cmp, NullPtrBlock); - - // Fill in FreeBlock. - new FreeInst(GVVal, FreeBlock); - new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], - FreeBlock); - BranchInst::Create(NextBlock, FreeBlock); - - NullPtrBlock = NextBlock; - } - - BranchInst::Create(ContBB, NullPtrBlock); - - // MI is no longer needed, remove it. - MI->eraseFromParent(); - - /// InsertedScalarizedLoads - As we process loads, if we can't immediately - /// update all uses of the load, keep track of what scalarized loads are - /// inserted for a given load. - DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues; - InsertedScalarizedValues[GV] = FieldGlobals; - - std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite; - - // Okay, the malloc site is completely handled. All of the uses of GV are now - // loads, and all uses of those loads are simple. Rewrite them to use loads - // of the per-field globals instead. - for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) { - Instruction *User = cast<Instruction>(*UI++); - - if (LoadInst *LI = dyn_cast<LoadInst>(User)) { - RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite, - Context); - continue; - } - - // Must be a store of null. - StoreInst *SI = cast<StoreInst>(User); - assert(isa<ConstantPointerNull>(SI->getOperand(0)) && - "Unexpected heap-sra user!"); - - // Insert a store of null into each global. - for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { - const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType()); - Constant *Null = Constant::getNullValue(PT->getElementType()); - new StoreInst(Null, FieldGlobals[i], SI); - } - // Erase the original store. - SI->eraseFromParent(); - } - - // While we have PHIs that are interesting to rewrite, do it. - while (!PHIsToRewrite.empty()) { - PHINode *PN = PHIsToRewrite.back().first; - unsigned FieldNo = PHIsToRewrite.back().second; - PHIsToRewrite.pop_back(); - PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]); - assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi"); - - // Add all the incoming values. This can materialize more phis. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *InVal = PN->getIncomingValue(i); - InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues, - PHIsToRewrite, Context); - FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); - } - } - - // Drop all inter-phi links and any loads that made it this far. - for (DenseMap<Value*, std::vector<Value*> >::iterator - I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); - I != E; ++I) { - if (PHINode *PN = dyn_cast<PHINode>(I->first)) - PN->dropAllReferences(); - else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) - LI->dropAllReferences(); - } - - // Delete all the phis and loads now that inter-references are dead. - for (DenseMap<Value*, std::vector<Value*> >::iterator - I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); - I != E; ++I) { - if (PHINode *PN = dyn_cast<PHINode>(I->first)) - PN->eraseFromParent(); - else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) - LI->eraseFromParent(); - } - - // The old global is now dead, remove it. - GV->eraseFromParent(); - - ++NumHeapSRA; - return cast<GlobalVariable>(FieldGlobals[0]); -} - /// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break /// it up into multiple allocations of arrays of the fields. static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, @@ -1587,6 +1283,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, << " BITCAST = " << *BCI << '\n'); const Type* MAT = getMallocAllocatedType(CI); const StructType *STy = cast<StructType>(MAT); + Value* ArraySize = getMallocArraySize(CI, Context, TD); + assert(ArraySize && "not a malloc whose array size can be determined"); // There is guaranteed to be at least one use of the malloc (storing // it into GV). If there are other uses, change them to be uses of @@ -1611,8 +1309,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, GV->isThreadLocal()); FieldGlobals.push_back(NGV); - Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), FieldTy, - getMallocArraySize(CI, Context, TD), + Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), + FieldTy, ArraySize, BCI->getName() + ".f" + Twine(FieldNo)); FieldMallocs.push_back(NMI); new StoreInst(NMI, NGV, BCI); @@ -1766,95 +1464,6 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, /// pointer global variable with a single value stored it that is a malloc or /// cast of malloc. static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, - MallocInst *MI, - Module::global_iterator &GVI, - TargetData *TD, - LLVMContext &Context) { - // If this is a malloc of an abstract type, don't touch it. - if (!MI->getAllocatedType()->isSized()) - return false; - - // We can't optimize this global unless all uses of it are *known* to be - // of the malloc value, not of the null initializer value (consider a use - // that compares the global's value against zero to see if the malloc has - // been reached). To do this, we check to see if all uses of the global - // would trap if the global were null: this proves that they must all - // happen after the malloc. - if (!AllUsesOfLoadedValueWillTrapIfNull(GV)) - return false; - - // We can't optimize this if the malloc itself is used in a complex way, - // for example, being stored into multiple globals. This allows the - // malloc to be stored into the specified global, loaded setcc'd, and - // GEP'd. These are all things we could transform to using the global - // for. - { - SmallPtrSet<PHINode*, 8> PHIs; - if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(MI, GV, PHIs)) - return false; - } - - - // If we have a global that is only initialized with a fixed size malloc, - // transform the program to use global memory instead of malloc'd memory. - // This eliminates dynamic allocation, avoids an indirection accessing the - // data, and exposes the resultant global to further GlobalOpt. - if (ConstantInt *NElements = dyn_cast<ConstantInt>(MI->getArraySize())) { - // Restrict this transformation to only working on small allocations - // (2048 bytes currently), as we don't want to introduce a 16M global or - // something. - if (TD && - NElements->getZExtValue()* - TD->getTypeAllocSize(MI->getAllocatedType()) < 2048) { - GVI = OptimizeGlobalAddressOfMalloc(GV, MI, Context); - return true; - } - } - - // If the allocation is an array of structures, consider transforming this - // into multiple malloc'd arrays, one for each field. This is basically - // SRoA for malloc'd memory. - const Type *AllocTy = MI->getAllocatedType(); - - // If this is an allocation of a fixed size array of structs, analyze as a - // variable size array. malloc [100 x struct],1 -> malloc struct, 100 - if (!MI->isArrayAllocation()) - if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) - AllocTy = AT->getElementType(); - - if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) { - // This the structure has an unreasonable number of fields, leave it - // alone. - if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && - AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, MI)) { - - // If this is a fixed size array, transform the Malloc to be an alloc of - // structs. malloc [100 x struct],1 -> malloc struct, 100 - if (const ArrayType *AT = dyn_cast<ArrayType>(MI->getAllocatedType())) { - MallocInst *NewMI = - new MallocInst(AllocSTy, - ConstantInt::get(Type::getInt32Ty(Context), - AT->getNumElements()), - "", MI); - NewMI->takeName(MI); - Value *Cast = new BitCastInst(NewMI, MI->getType(), "tmp", MI); - MI->replaceAllUsesWith(Cast); - MI->eraseFromParent(); - MI = NewMI; - } - - GVI = PerformHeapAllocSRoA(GV, MI, Context); - return true; - } - } - - return false; -} - -/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a -/// pointer global variable with a single value stored it that is a malloc or -/// cast of malloc. -static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, BitCastInst *BCI, Module::global_iterator &GVI, @@ -1892,52 +1501,55 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // transform the program to use global memory instead of malloc'd memory. // This eliminates dynamic allocation, avoids an indirection accessing the // data, and exposes the resultant global to further GlobalOpt. - if (ConstantInt *NElements = - dyn_cast<ConstantInt>(getMallocArraySize(CI, Context, TD))) { - // Restrict this transformation to only working on small allocations - // (2048 bytes currently), as we don't want to introduce a 16M global or - // something. - if (TD && - NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { - GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, Context, TD); - return true; - } - } - - // If the allocation is an array of structures, consider transforming this - // into multiple malloc'd arrays, one for each field. This is basically - // SRoA for malloc'd memory. - - // If this is an allocation of a fixed size array of structs, analyze as a - // variable size array. malloc [100 x struct],1 -> malloc struct, 100 - if (!isArrayMalloc(CI, Context, TD)) - if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) - AllocTy = AT->getElementType(); - - if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) { - // This the structure has an unreasonable number of fields, leave it - // alone. - if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && - AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) { - - // If this is a fixed size array, transform the Malloc to be an alloc of - // structs. malloc [100 x struct],1 -> malloc struct, 100 - if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { - Value* NumElements = ConstantInt::get(Type::getInt32Ty(Context), - AT->getNumElements()); - Value* NewMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), - AllocSTy, NumElements, - BCI->getName()); - Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI); - BCI->replaceAllUsesWith(Cast); - BCI->eraseFromParent(); - CI->eraseFromParent(); - BCI = cast<BitCastInst>(NewMI); - CI = extractMallocCallFromBitCast(NewMI); + Value *NElems = getMallocArraySize(CI, Context, TD); + // We cannot optimize the malloc if we cannot determine malloc array size. + if (NElems) { + if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) + // Restrict this transformation to only working on small allocations + // (2048 bytes currently), as we don't want to introduce a 16M global or + // something. + if (TD && + NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { + GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, Context, TD); + return true; } + + // If the allocation is an array of structures, consider transforming this + // into multiple malloc'd arrays, one for each field. This is basically + // SRoA for malloc'd memory. + + // If this is an allocation of a fixed size array of structs, analyze as a + // variable size array. malloc [100 x struct],1 -> malloc struct, 100 + if (!isArrayMalloc(CI, Context, TD)) + if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) + AllocTy = AT->getElementType(); + + if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) { + // This the structure has an unreasonable number of fields, leave it + // alone. + if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && + AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) { + + // If this is a fixed size array, transform the Malloc to be an alloc of + // structs. malloc [100 x struct],1 -> malloc struct, 100 + if (const ArrayType *AT = + dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { + Value* NumElements = ConstantInt::get(Type::getInt32Ty(Context), + AT->getNumElements()); + Value* NewMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), + AllocSTy, NumElements, + BCI->getName()); + Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI); + BCI->replaceAllUsesWith(Cast); + BCI->eraseFromParent(); + CI->eraseFromParent(); + BCI = cast<BitCastInst>(NewMI); + CI = extractMallocCallFromBitCast(NewMI); + } - GVI = PerformHeapAllocSRoA(GV, CI, BCI, Context, TD); - return true; + GVI = PerformHeapAllocSRoA(GV, CI, BCI, Context, TD); + return true; + } } } @@ -1966,9 +1578,6 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, // Optimize away any trapping uses of the loaded value. if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, Context)) return true; - } else if (MallocInst *MI = dyn_cast<MallocInst>(StoredOnceVal)) { - if (TryToOptimizeStoreOfMallocToGlobal(GV, MI, GVI, TD, Context)) - return true; } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { if (getMallocAllocatedType(CI)) { BitCastInst* BCI = NULL; diff --git a/lib/Transforms/IPO/RaiseAllocations.cpp b/lib/Transforms/IPO/RaiseAllocations.cpp index 4c1f26d..deb4405 100644 --- a/lib/Transforms/IPO/RaiseAllocations.cpp +++ b/lib/Transforms/IPO/RaiseAllocations.cpp @@ -1,4 +1,4 @@ -//===- RaiseAllocations.cpp - Convert @malloc & @free calls to insts ------===// +//===- RaiseAllocations.cpp - Convert @free calls to insts ------===// // // The LLVM Compiler Infrastructure // @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file defines the RaiseAllocations pass which convert malloc and free -// calls to malloc and free instructions. +// This file defines the RaiseAllocations pass which convert free calls to free +// instructions. // //===----------------------------------------------------------------------===// @@ -29,19 +29,19 @@ using namespace llvm; STATISTIC(NumRaised, "Number of allocations raised"); namespace { - // RaiseAllocations - Turn @malloc and @free calls into the appropriate + // RaiseAllocations - Turn @free calls into the appropriate // instruction. // class VISIBILITY_HIDDEN RaiseAllocations : public ModulePass { - Function *MallocFunc; // Functions in the module we are processing - Function *FreeFunc; // Initialized by doPassInitializationVirt + Function *FreeFunc; // Functions in the module we are processing + // Initialized by doPassInitializationVirt public: static char ID; // Pass identification, replacement for typeid RaiseAllocations() - : ModulePass(&ID), MallocFunc(0), FreeFunc(0) {} + : ModulePass(&ID), FreeFunc(0) {} // doPassInitialization - For the raise allocations pass, this finds a - // declaration for malloc and free if they exist. + // declaration for free if it exists. // void doInitialization(Module &M); @@ -61,50 +61,16 @@ ModulePass *llvm::createRaiseAllocationsPass() { } -// If the module has a symbol table, they might be referring to the malloc and -// free functions. If this is the case, grab the method pointers that the -// module is using. +// If the module has a symbol table, they might be referring to the free +// function. If this is the case, grab the method pointers that the module is +// using. // -// Lookup @malloc and @free in the symbol table, for later use. If they don't +// Lookup @free in the symbol table, for later use. If they don't // exist, or are not external, we do not worry about converting calls to that // function into the appropriate instruction. // void RaiseAllocations::doInitialization(Module &M) { - // Get Malloc and free prototypes if they exist! - MallocFunc = M.getFunction("malloc"); - if (MallocFunc) { - const FunctionType* TyWeHave = MallocFunc->getFunctionType(); - - // Get the expected prototype for malloc - const FunctionType *Malloc1Type = - FunctionType::get(Type::getInt8PtrTy(M.getContext()), - std::vector<const Type*>(1, - Type::getInt64Ty(M.getContext())), false); - - // Chck to see if we got the expected malloc - if (TyWeHave != Malloc1Type) { - // Check to see if the prototype is wrong, giving us i8*(i32) * malloc - // This handles the common declaration of: 'void *malloc(unsigned);' - const FunctionType *Malloc2Type = - FunctionType::get(PointerType::getUnqual( - Type::getInt8Ty(M.getContext())), - std::vector<const Type*>(1, - Type::getInt32Ty(M.getContext())), false); - if (TyWeHave != Malloc2Type) { - // Check to see if the prototype is missing, giving us - // i8*(...) * malloc - // This handles the common declaration of: 'void *malloc();' - const FunctionType *Malloc3Type = - FunctionType::get(PointerType::getUnqual( - Type::getInt8Ty(M.getContext())), - true); - if (TyWeHave != Malloc3Type) - // Give up - MallocFunc = 0; - } - } - } - + // Get free prototype if it exists! FreeFunc = M.getFunction("free"); if (FreeFunc) { const FunctionType* TyWeHave = FreeFunc->getFunctionType(); @@ -138,72 +104,18 @@ void RaiseAllocations::doInitialization(Module &M) { } // Don't mess with locally defined versions of these functions... - if (MallocFunc && !MallocFunc->isDeclaration()) MallocFunc = 0; if (FreeFunc && !FreeFunc->isDeclaration()) FreeFunc = 0; } // run - Transform calls into instructions... // bool RaiseAllocations::runOnModule(Module &M) { - // Find the malloc/free prototypes... + // Find the free prototype... doInitialization(M); bool Changed = false; - // First, process all of the malloc calls... - if (MallocFunc) { - std::vector<User*> Users(MallocFunc->use_begin(), MallocFunc->use_end()); - std::vector<Value*> EqPointers; // Values equal to MallocFunc - while (!Users.empty()) { - User *U = Users.back(); - Users.pop_back(); - - if (Instruction *I = dyn_cast<Instruction>(U)) { - CallSite CS = CallSite::get(I); - if (CS.getInstruction() && !CS.arg_empty() && - (CS.getCalledFunction() == MallocFunc || - std::find(EqPointers.begin(), EqPointers.end(), - CS.getCalledValue()) != EqPointers.end())) { - - Value *Source = *CS.arg_begin(); - - // If no prototype was provided for malloc, we may need to cast the - // source size. - if (Source->getType() != Type::getInt32Ty(M.getContext())) - Source = - CastInst::CreateIntegerCast(Source, - Type::getInt32Ty(M.getContext()), - false/*ZExt*/, - "MallocAmtCast", I); - - MallocInst *MI = new MallocInst(Type::getInt8Ty(M.getContext()), - Source, "", I); - MI->takeName(I); - I->replaceAllUsesWith(MI); - - // If the old instruction was an invoke, add an unconditional branch - // before the invoke, which will become the new terminator. - if (InvokeInst *II = dyn_cast<InvokeInst>(I)) - BranchInst::Create(II->getNormalDest(), I); - - // Delete the old call site - I->eraseFromParent(); - Changed = true; - ++NumRaised; - } - } else if (GlobalValue *GV = dyn_cast<GlobalValue>(U)) { - Users.insert(Users.end(), GV->use_begin(), GV->use_end()); - EqPointers.push_back(GV); - } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { - if (CE->isCast()) { - Users.insert(Users.end(), CE->use_begin(), CE->use_end()); - EqPointers.push_back(CE); - } - } - } - } - - // Next, process all free calls... + // Process all free calls... if (FreeFunc) { std::vector<User*> Users(FreeFunc->use_begin(), FreeFunc->use_end()); std::vector<Value*> EqPointers; // Values equal to FreeFunc diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 77d44b2..57aaf43 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -250,8 +250,6 @@ static bool StripDebugInfo(Module &M) { if (NMD) NMD->eraseFromParent(); - // Remove dead metadata. - M.getContext().RemoveDeadMetadata(); return true; } diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index a3e3fea..42209b8 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -73,6 +73,7 @@ namespace { DenseMap<Value*,Value*> &SunkAddrs); bool OptimizeInlineAsmInst(Instruction *I, CallSite CS, DenseMap<Value*,Value*> &SunkAddrs); + bool MoveExtToFormExtLoad(Instruction *I); bool OptimizeExtUses(Instruction *I); void findLoopBackEdges(const Function &F); }; @@ -731,6 +732,43 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS, return MadeChange; } +/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same +/// basic block as the load, unless conditions are unfavorable. This allows +/// SelectionDAG to fold the extend into the load. +/// +bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) { + // Look for a load being extended. + LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0)); + if (!LI) return false; + + // If they're already in the same block, there's nothing to do. + if (LI->getParent() == I->getParent()) + return false; + + // If the load has other users and the truncate is not free, this probably + // isn't worthwhile. + if (!LI->hasOneUse() && + TLI && !TLI->isTruncateFree(I->getType(), LI->getType())) + return false; + + // Check whether the target supports casts folded into loads. + unsigned LType; + if (isa<ZExtInst>(I)) + LType = ISD::ZEXTLOAD; + else { + assert(isa<SExtInst>(I) && "Unexpected ext type!"); + LType = ISD::SEXTLOAD; + } + if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType()))) + return false; + + // Move the extend into the same block as the load, so that SelectionDAG + // can fold it. + I->removeFromParent(); + I->insertAfter(LI); + return true; +} + bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { BasicBlock *DefBB = I->getParent(); @@ -846,8 +884,10 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { MadeChange |= Change; } - if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I))) + if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I))) { + MadeChange |= MoveExtToFormExtLoad(I); MadeChange |= OptimizeExtUses(I); + } } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) { MadeChange |= OptimizeCmpExpression(CI); } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 2ed4a63..8859324 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -78,13 +78,10 @@ namespace { SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI, FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT, - EMPTY, TOMBSTONE }; + INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE }; ExpressionOpcode opcode; const Type* type; - uint32_t firstVN; - uint32_t secondVN; - uint32_t thirdVN; SmallVector<uint32_t, 4> varargs; Value *function; @@ -100,12 +97,6 @@ namespace { return false; else if (function != other.function) return false; - else if (firstVN != other.firstVN) - return false; - else if (secondVN != other.secondVN) - return false; - else if (thirdVN != other.thirdVN) - return false; else { if (varargs.size() != other.varargs.size()) return false; @@ -146,6 +137,10 @@ namespace { Expression create_expression(GetElementPtrInst* G); Expression create_expression(CallInst* C); Expression create_expression(Constant* C); + Expression create_expression(ExtractValueInst* C); + Expression create_expression(InsertValueInst* C); + + uint32_t lookup_or_add_call(CallInst* C); public: ValueTable() : nextValueNumber(1) { } uint32_t lookup_or_add(Value *V); @@ -176,13 +171,8 @@ template <> struct DenseMapInfo<Expression> { static unsigned getHashValue(const Expression e) { unsigned hash = e.opcode; - hash = e.firstVN + hash * 37; - hash = e.secondVN + hash * 37; - hash = e.thirdVN + hash * 37; - hash = ((unsigned)((uintptr_t)e.type >> 4) ^ - (unsigned)((uintptr_t)e.type >> 9)) + - hash * 37; + (unsigned)((uintptr_t)e.type >> 9)); for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(), E = e.varargs.end(); I != E; ++I) @@ -290,9 +280,6 @@ Expression ValueTable::create_expression(CallInst* C) { Expression e; e.type = C->getType(); - e.firstVN = 0; - e.secondVN = 0; - e.thirdVN = 0; e.function = C->getCalledFunction(); e.opcode = Expression::CALL; @@ -305,10 +292,8 @@ Expression ValueTable::create_expression(CallInst* C) { Expression ValueTable::create_expression(BinaryOperator* BO) { Expression e; - - e.firstVN = lookup_or_add(BO->getOperand(0)); - e.secondVN = lookup_or_add(BO->getOperand(1)); - e.thirdVN = 0; + e.varargs.push_back(lookup_or_add(BO->getOperand(0))); + e.varargs.push_back(lookup_or_add(BO->getOperand(1))); e.function = 0; e.type = BO->getType(); e.opcode = getOpcode(BO); @@ -319,9 +304,8 @@ Expression ValueTable::create_expression(BinaryOperator* BO) { Expression ValueTable::create_expression(CmpInst* C) { Expression e; - e.firstVN = lookup_or_add(C->getOperand(0)); - e.secondVN = lookup_or_add(C->getOperand(1)); - e.thirdVN = 0; + e.varargs.push_back(lookup_or_add(C->getOperand(0))); + e.varargs.push_back(lookup_or_add(C->getOperand(1))); e.function = 0; e.type = C->getType(); e.opcode = getOpcode(C); @@ -332,9 +316,7 @@ Expression ValueTable::create_expression(CmpInst* C) { Expression ValueTable::create_expression(CastInst* C) { Expression e; - e.firstVN = lookup_or_add(C->getOperand(0)); - e.secondVN = 0; - e.thirdVN = 0; + e.varargs.push_back(lookup_or_add(C->getOperand(0))); e.function = 0; e.type = C->getType(); e.opcode = getOpcode(C); @@ -345,9 +327,9 @@ Expression ValueTable::create_expression(CastInst* C) { Expression ValueTable::create_expression(ShuffleVectorInst* S) { Expression e; - e.firstVN = lookup_or_add(S->getOperand(0)); - e.secondVN = lookup_or_add(S->getOperand(1)); - e.thirdVN = lookup_or_add(S->getOperand(2)); + e.varargs.push_back(lookup_or_add(S->getOperand(0))); + e.varargs.push_back(lookup_or_add(S->getOperand(1))); + e.varargs.push_back(lookup_or_add(S->getOperand(2))); e.function = 0; e.type = S->getType(); e.opcode = Expression::SHUFFLE; @@ -358,9 +340,8 @@ Expression ValueTable::create_expression(ShuffleVectorInst* S) { Expression ValueTable::create_expression(ExtractElementInst* E) { Expression e; - e.firstVN = lookup_or_add(E->getOperand(0)); - e.secondVN = lookup_or_add(E->getOperand(1)); - e.thirdVN = 0; + e.varargs.push_back(lookup_or_add(E->getOperand(0))); + e.varargs.push_back(lookup_or_add(E->getOperand(1))); e.function = 0; e.type = E->getType(); e.opcode = Expression::EXTRACT; @@ -371,9 +352,9 @@ Expression ValueTable::create_expression(ExtractElementInst* E) { Expression ValueTable::create_expression(InsertElementInst* I) { Expression e; - e.firstVN = lookup_or_add(I->getOperand(0)); - e.secondVN = lookup_or_add(I->getOperand(1)); - e.thirdVN = lookup_or_add(I->getOperand(2)); + e.varargs.push_back(lookup_or_add(I->getOperand(0))); + e.varargs.push_back(lookup_or_add(I->getOperand(1))); + e.varargs.push_back(lookup_or_add(I->getOperand(2))); e.function = 0; e.type = I->getType(); e.opcode = Expression::INSERT; @@ -384,9 +365,9 @@ Expression ValueTable::create_expression(InsertElementInst* I) { Expression ValueTable::create_expression(SelectInst* I) { Expression e; - e.firstVN = lookup_or_add(I->getCondition()); - e.secondVN = lookup_or_add(I->getTrueValue()); - e.thirdVN = lookup_or_add(I->getFalseValue()); + e.varargs.push_back(lookup_or_add(I->getCondition())); + e.varargs.push_back(lookup_or_add(I->getTrueValue())); + e.varargs.push_back(lookup_or_add(I->getFalseValue())); e.function = 0; e.type = I->getType(); e.opcode = Expression::SELECT; @@ -397,9 +378,7 @@ Expression ValueTable::create_expression(SelectInst* I) { Expression ValueTable::create_expression(GetElementPtrInst* G) { Expression e; - e.firstVN = lookup_or_add(G->getPointerOperand()); - e.secondVN = 0; - e.thirdVN = 0; + e.varargs.push_back(lookup_or_add(G->getPointerOperand())); e.function = 0; e.type = G->getType(); e.opcode = Expression::GEP; @@ -411,6 +390,35 @@ Expression ValueTable::create_expression(GetElementPtrInst* G) { return e; } +Expression ValueTable::create_expression(ExtractValueInst* E) { + Expression e; + + e.varargs.push_back(lookup_or_add(E->getAggregateOperand())); + for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); + II != IE; ++II) + e.varargs.push_back(*II); + e.function = 0; + e.type = E->getType(); + e.opcode = Expression::EXTRACTVALUE; + + return e; +} + +Expression ValueTable::create_expression(InsertValueInst* E) { + Expression e; + + e.varargs.push_back(lookup_or_add(E->getAggregateOperand())); + e.varargs.push_back(lookup_or_add(E->getInsertedValueOperand())); + for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); + II != IE; ++II) + e.varargs.push_back(*II); + e.function = 0; + e.type = E->getType(); + e.opcode = Expression::INSERTVALUE; + + return e; +} + //===----------------------------------------------------------------------===// // ValueTable External Functions //===----------------------------------------------------------------------===// @@ -420,232 +428,197 @@ void ValueTable::add(Value *V, uint32_t num) { valueNumbering.insert(std::make_pair(V, num)); } -/// lookup_or_add - Returns the value number for the specified value, assigning -/// it a new number if it did not have one before. -uint32_t ValueTable::lookup_or_add(Value *V) { - DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); - if (VI != valueNumbering.end()) - return VI->second; - - if (CallInst* C = dyn_cast<CallInst>(V)) { - if (AA->doesNotAccessMemory(C)) { - Expression e = create_expression(C); - - DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - - return nextValueNumber++; - } - } else if (AA->onlyReadsMemory(C)) { - Expression e = create_expression(C); - - if (expressionNumbering.find(e) == expressionNumbering.end()) { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; - } - - MemDepResult local_dep = MD->getDependency(C); - - if (!local_dep.isDef() && !local_dep.isNonLocal()) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; - } - - if (local_dep.isDef()) { - CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); - - if (local_cdep->getNumOperands() != C->getNumOperands()) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; - } - - for (unsigned i = 1; i < C->getNumOperands(); ++i) { - uint32_t c_vn = lookup_or_add(C->getOperand(i)); - uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); - if (c_vn != cd_vn) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; - } - } - - uint32_t v = lookup_or_add(local_cdep); - valueNumbering.insert(std::make_pair(V, v)); - return v; - } - - // Non-local case. - const MemoryDependenceAnalysis::NonLocalDepInfo &deps = - MD->getNonLocalCallDependency(CallSite(C)); - // FIXME: call/call dependencies for readonly calls should return def, not - // clobber! Move the checking logic to MemDep! - CallInst* cdep = 0; - - // Check to see if we have a single dominating call instruction that is - // identical to C. - for (unsigned i = 0, e = deps.size(); i != e; ++i) { - const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i]; - // Ignore non-local dependencies. - if (I->second.isNonLocal()) - continue; +uint32_t ValueTable::lookup_or_add_call(CallInst* C) { + if (AA->doesNotAccessMemory(C)) { + Expression exp = create_expression(C); + uint32_t& e = expressionNumbering[exp]; + if (!e) e = nextValueNumber++; + valueNumbering[C] = e; + return e; + } else if (AA->onlyReadsMemory(C)) { + Expression exp = create_expression(C); + uint32_t& e = expressionNumbering[exp]; + if (!e) { + e = nextValueNumber++; + valueNumbering[C] = e; + return e; + } - // We don't handle non-depedencies. If we already have a call, reject - // instruction dependencies. - if (I->second.isClobber() || cdep != 0) { - cdep = 0; - break; - } + MemDepResult local_dep = MD->getDependency(C); - CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst()); - // FIXME: All duplicated with non-local case. - if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){ - cdep = NonLocalDepCall; - continue; - } + if (!local_dep.isDef() && !local_dep.isNonLocal()) { + valueNumbering[C] = nextValueNumber; + return nextValueNumber++; + } - cdep = 0; - break; - } + if (local_dep.isDef()) { + CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); - if (!cdep) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); + if (local_cdep->getNumOperands() != C->getNumOperands()) { + valueNumbering[C] = nextValueNumber; return nextValueNumber++; } - if (cdep->getNumOperands() != C->getNumOperands()) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; - } for (unsigned i = 1; i < C->getNumOperands(); ++i) { uint32_t c_vn = lookup_or_add(C->getOperand(i)); - uint32_t cd_vn = lookup_or_add(cdep->getOperand(i)); + uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); if (c_vn != cd_vn) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); + valueNumbering[C] = nextValueNumber; return nextValueNumber++; } } - uint32_t v = lookup_or_add(cdep); - valueNumbering.insert(std::make_pair(V, v)); + uint32_t v = lookup_or_add(local_cdep); + valueNumbering[C] = v; return v; - - } else { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; } - } else if (BinaryOperator* BO = dyn_cast<BinaryOperator>(V)) { - Expression e = create_expression(BO); - DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); + // Non-local case. + const MemoryDependenceAnalysis::NonLocalDepInfo &deps = + MD->getNonLocalCallDependency(CallSite(C)); + // FIXME: call/call dependencies for readonly calls should return def, not + // clobber! Move the checking logic to MemDep! + CallInst* cdep = 0; + + // Check to see if we have a single dominating call instruction that is + // identical to C. + for (unsigned i = 0, e = deps.size(); i != e; ++i) { + const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i]; + // Ignore non-local dependencies. + if (I->second.isNonLocal()) + continue; - return nextValueNumber++; - } - } else if (CmpInst* C = dyn_cast<CmpInst>(V)) { - Expression e = create_expression(C); + // We don't handle non-depedencies. If we already have a call, reject + // instruction dependencies. + if (I->second.isClobber() || cdep != 0) { + cdep = 0; + break; + } - DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); + CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst()); + // FIXME: All duplicated with non-local case. + if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){ + cdep = NonLocalDepCall; + continue; + } - return nextValueNumber++; + cdep = 0; + break; } - } else if (ShuffleVectorInst* U = dyn_cast<ShuffleVectorInst>(V)) { - Expression e = create_expression(U); - - DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); + if (!cdep) { + valueNumbering[C] = nextValueNumber; return nextValueNumber++; } - } else if (ExtractElementInst* U = dyn_cast<ExtractElementInst>(V)) { - Expression e = create_expression(U); - - DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); + if (cdep->getNumOperands() != C->getNumOperands()) { + valueNumbering[C] = nextValueNumber; return nextValueNumber++; } - } else if (InsertElementInst* U = dyn_cast<InsertElementInst>(V)) { - Expression e = create_expression(U); - - DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - - return nextValueNumber++; + for (unsigned i = 1; i < C->getNumOperands(); ++i) { + uint32_t c_vn = lookup_or_add(C->getOperand(i)); + uint32_t cd_vn = lookup_or_add(cdep->getOperand(i)); + if (c_vn != cd_vn) { + valueNumbering[C] = nextValueNumber; + return nextValueNumber++; + } } - } else if (SelectInst* U = dyn_cast<SelectInst>(V)) { - Expression e = create_expression(U); - DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); + uint32_t v = lookup_or_add(cdep); + valueNumbering[C] = v; + return v; - return nextValueNumber++; - } - } else if (CastInst* U = dyn_cast<CastInst>(V)) { - Expression e = create_expression(U); - - DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - - return nextValueNumber++; - } - } else if (GetElementPtrInst* U = dyn_cast<GetElementPtrInst>(V)) { - Expression e = create_expression(U); + } else { + valueNumbering[C] = nextValueNumber; + return nextValueNumber++; + } +} - DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); +/// lookup_or_add - Returns the value number for the specified value, assigning +/// it a new number if it did not have one before. +uint32_t ValueTable::lookup_or_add(Value *V) { + DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); + if (VI != valueNumbering.end()) + return VI->second; - return nextValueNumber++; - } - } else { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); + if (!isa<Instruction>(V)) { + valueNumbering[V] = nextValueNumber; return nextValueNumber++; } + + Instruction* I = cast<Instruction>(V); + Expression exp; + switch (I->getOpcode()) { + case Instruction::Call: + return lookup_or_add_call(cast<CallInst>(I)); + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or : + case Instruction::Xor: + exp = create_expression(cast<BinaryOperator>(I)); + break; + case Instruction::ICmp: + case Instruction::FCmp: + exp = create_expression(cast<CmpInst>(I)); + break; + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::BitCast: + exp = create_expression(cast<CastInst>(I)); + break; + case Instruction::Select: + exp = create_expression(cast<SelectInst>(I)); + break; + case Instruction::ExtractElement: + exp = create_expression(cast<ExtractElementInst>(I)); + break; + case Instruction::InsertElement: + exp = create_expression(cast<InsertElementInst>(I)); + break; + case Instruction::ShuffleVector: + exp = create_expression(cast<ShuffleVectorInst>(I)); + break; + case Instruction::ExtractValue: + exp = create_expression(cast<ExtractValueInst>(I)); + break; + case Instruction::InsertValue: + exp = create_expression(cast<InsertValueInst>(I)); + break; + case Instruction::GetElementPtr: + exp = create_expression(cast<GetElementPtrInst>(I)); + break; + default: + valueNumbering[V] = nextValueNumber; + return nextValueNumber++; + } + + uint32_t& e = expressionNumbering[exp]; + if (!e) e = nextValueNumber++; + valueNumbering[V] = e; + return e; } /// lookup - Returns the value number of the specified value. Fails if @@ -1557,15 +1530,18 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // actually have the same type. See if we know how to reuse the stored // value (depending on its type). const TargetData *TD = 0; - if (StoredVal->getType() != L->getType() && - (TD = getAnalysisIfAvailable<TargetData>())) { - StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), - L, *TD); - if (StoredVal == 0) + if (StoredVal->getType() != L->getType()) { + if ((TD = getAnalysisIfAvailable<TargetData>())) { + StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), + L, *TD); + if (StoredVal == 0) + return false; + + DEBUG(errs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal + << '\n' << *L << "\n\n\n"); + } + else return false; - - DEBUG(errs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal - << '\n' << *L << "\n\n\n"); } // Remove it! @@ -1584,14 +1560,17 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // the same type. See if we know how to reuse the previously loaded value // (depending on its type). const TargetData *TD = 0; - if (DepLI->getType() != L->getType() && - (TD = getAnalysisIfAvailable<TargetData>())) { - AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); - if (AvailableVal == 0) - return false; + if (DepLI->getType() != L->getType()) { + if ((TD = getAnalysisIfAvailable<TargetData>())) { + AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); + if (AvailableVal == 0) + return false; - DEBUG(errs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal - << "\n" << *L << "\n\n\n"); + DEBUG(errs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal + << "\n" << *L << "\n\n\n"); + } + else + return false; } // Remove it! diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index f635af3..b41b5d4 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -6300,25 +6300,33 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); break; } - case Instruction::Malloc: - // If we have (malloc != null), and if the malloc has a single use, we - // can assume it is successful and remove the malloc. - if (LHSI->hasOneUse() && isa<ConstantPointerNull>(RHSC)) { - Worklist.Add(LHSI); - return ReplaceInstUsesWith(I, - ConstantInt::get(Type::getInt1Ty(*Context), - !I.isTrueWhenEqual())); - } - break; case Instruction::Call: // If we have (malloc != null), and if the malloc has a single use, we // can assume it is successful and remove the malloc. if (isMalloc(LHSI) && LHSI->hasOneUse() && isa<ConstantPointerNull>(RHSC)) { - Worklist.Add(LHSI); - return ReplaceInstUsesWith(I, + // Need to explicitly erase malloc call here, instead of adding it to + // Worklist, because it won't get DCE'd from the Worklist since + // isInstructionTriviallyDead() returns false for function calls. + // It is OK to replace LHSI/MallocCall with Undef because the + // instruction that uses it will be erased via Worklist. + if (extractMallocCall(LHSI)) { + LHSI->replaceAllUsesWith(UndefValue::get(LHSI->getType())); + EraseInstFromFunction(*LHSI); + return ReplaceInstUsesWith(I, ConstantInt::get(Type::getInt1Ty(*Context), !I.isTrueWhenEqual())); + } + if (CallInst* MallocCall = extractMallocCallFromBitCast(LHSI)) + if (MallocCall->hasOneUse()) { + MallocCall->replaceAllUsesWith( + UndefValue::get(MallocCall->getType())); + EraseInstFromFunction(*MallocCall); + Worklist.Add(LHSI); // The malloc's bitcast use. + return ReplaceInstUsesWith(I, + ConstantInt::get(Type::getInt1Ty(*Context), + !I.isTrueWhenEqual())); + } } break; } @@ -7809,11 +7817,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp"); } - AllocationInst *New; - if (isa<MallocInst>(AI)) - New = AllocaBuilder.CreateMalloc(CastElTy, Amt); - else - New = AllocaBuilder.CreateAlloca(CastElTy, Amt); + AllocationInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt); New->setAlignment(AI.getAlignment()); New->takeName(&AI); @@ -9270,14 +9274,44 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, return Changed ? &SI : 0; } -/// isDefinedInBB - Return true if the value is an instruction defined in the -/// specified basicblock. -static bool isDefinedInBB(const Value *V, const BasicBlock *BB) { + +/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a +/// PHI node (but the two may be in different blocks). See if the true/false +/// values (V) are live in all of the predecessor blocks of the PHI. For +/// example, cases like this cannot be mapped: +/// +/// X = phi [ C1, BB1], [C2, BB2] +/// Y = add +/// Z = select X, Y, 0 +/// +/// because Y is not live in BB1/BB2. +/// +static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V, + const SelectInst &SI) { + // If the value is a non-instruction value like a constant or argument, it + // can always be mapped. const Instruction *I = dyn_cast<Instruction>(V); - return I != 0 && I->getParent() == BB; + if (I == 0) return true; + + // If V is a PHI node defined in the same block as the condition PHI, we can + // map the arguments. + const PHINode *CondPHI = cast<PHINode>(SI.getCondition()); + + if (const PHINode *VP = dyn_cast<PHINode>(I)) + if (VP->getParent() == CondPHI->getParent()) + return true; + + // Otherwise, if the PHI and select are defined in the same block and if V is + // defined in a different block, then we can transform it. + if (SI.getParent() == CondPHI->getParent() && + I->getParent() != CondPHI->getParent()) + return true; + + // Otherwise we have a 'hard' case and we can't tell without doing more + // detailed dominator based analysis, punt. + return false; } - Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *CondVal = SI.getCondition(); Value *TrueVal = SI.getTrueValue(); @@ -9489,16 +9523,13 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return FoldI; } - // See if we can fold the select into a phi node. The true/false values have - // to be live in the predecessor blocks. If they are instructions in SI's - // block, we can't map to the predecessor. - if (isa<PHINode>(SI.getCondition()) && - (!isDefinedInBB(SI.getTrueValue(), SI.getParent()) || - isa<PHINode>(SI.getTrueValue())) && - (!isDefinedInBB(SI.getFalseValue(), SI.getParent()) || - isa<PHINode>(SI.getFalseValue()))) - if (Instruction *NV = FoldOpIntoPhi(SI)) - return NV; + // See if we can fold the select into a phi node if the condition is a select. + if (isa<PHINode>(SI.getCondition())) + // The true/false values have to be live in the PHI predecessor's blocks. + if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) && + CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI)) + if (Instruction *NV = FoldOpIntoPhi(SI)) + return NV; if (BinaryOperator::isNot(CondVal)) { SI.setOperand(0, BinaryOperator::getNotArgument(CondVal)); @@ -11213,15 +11244,8 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) { if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { const Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); - AllocationInst *New = 0; - - // Create and insert the replacement instruction... - if (isa<MallocInst>(AI)) - New = Builder->CreateMalloc(NewTy, 0, AI.getName()); - else { - assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!"); - New = Builder->CreateAlloca(NewTy, 0, AI.getName()); - } + assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!"); + AllocationInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); New->setAlignment(AI.getAlignment()); // Scan to the end of the allocation instructions, to skip over a block of @@ -11294,12 +11318,6 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) { } } - // Change free(malloc) into nothing, if the malloc has a single use. - if (MallocInst *MI = dyn_cast<MallocInst>(Op)) - if (MI->hasOneUse()) { - EraseInstFromFunction(FI); - return EraseInstFromFunction(*MI); - } if (isMalloc(Op)) { if (CallInst* CI = extractMallocCallFromBitCast(Op)) { if (Op->hasOneUse() && CI->hasOneUse()) { @@ -11327,40 +11345,6 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, Value *CastOp = CI->getOperand(0); LLVMContext *Context = IC.getContext(); - if (TD) { - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(CI)) { - // Instead of loading constant c string, use corresponding integer value - // directly if string length is small enough. - std::string Str; - if (GetConstantStringInfo(CE->getOperand(0), Str) && !Str.empty()) { - unsigned len = Str.length(); - const Type *Ty = cast<PointerType>(CE->getType())->getElementType(); - unsigned numBits = Ty->getPrimitiveSizeInBits(); - // Replace LI with immediate integer store. - if ((numBits >> 3) == len + 1) { - APInt StrVal(numBits, 0); - APInt SingleChar(numBits, 0); - if (TD->isLittleEndian()) { - for (signed i = len-1; i >= 0; i--) { - SingleChar = (uint64_t) Str[i] & UCHAR_MAX; - StrVal = (StrVal << 8) | SingleChar; - } - } else { - for (unsigned i = 0; i < len; i++) { - SingleChar = (uint64_t) Str[i] & UCHAR_MAX; - StrVal = (StrVal << 8) | SingleChar; - } - // Append NULL at the end. - SingleChar = 0; - StrVal = (StrVal << 8) | SingleChar; - } - Value *NL = ConstantInt::get(*Context, StrVal); - return IC.ReplaceInstUsesWith(LI, NL); - } - } - } - } - const PointerType *DestTy = cast<PointerType>(CI->getType()); const Type *DestPTy = DestTy->getElementType(); if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { @@ -11380,7 +11364,8 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, if (Constant *CSrc = dyn_cast<Constant>(CastOp)) if (ASrcTy->getNumElements() != 0) { Value *Idxs[2]; - Idxs[0] = Idxs[1] = Constant::getNullValue(Type::getInt32Ty(*Context)); + Idxs[0] = Constant::getNullValue(Type::getInt32Ty(*Context)); + Idxs[1] = Idxs[0]; CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2); SrcTy = cast<PointerType>(CastOp->getType()); SrcPTy = SrcTy->getElementType(); @@ -11436,6 +11421,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) return ReplaceInstUsesWith(LI, AvailableVal); + // load(gep null, ...) -> unreachable if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { const Value *GEPI0 = GEPI->getOperand(0); // TODO: Consider a target hook for valid address spaces for this xform. @@ -11450,60 +11436,24 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { } } - if (Constant *C = dyn_cast<Constant>(Op)) { - // load null/undef -> undef - // TODO: Consider a target hook for valid address spaces for this xform. - if (isa<UndefValue>(C) || - (C->isNullValue() && LI.getPointerAddressSpace() == 0)) { - // Insert a new store to null instruction before the load to indicate that - // this code is not reachable. We do this instead of inserting an - // unreachable instruction directly because we cannot modify the CFG. - new StoreInst(UndefValue::get(LI.getType()), - Constant::getNullValue(Op->getType()), &LI); - return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); - } - - // Instcombine load (constant global) into the value loaded. - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op)) - if (GV->isConstant() && GV->hasDefinitiveInitializer()) - return ReplaceInstUsesWith(LI, GV->getInitializer()); - - // Instcombine load (constantexpr_GEP global, 0, ...) into the value loaded. - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) { - if (CE->getOpcode() == Instruction::GetElementPtr) { - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0))) - if (GV->isConstant() && GV->hasDefinitiveInitializer()) - if (Constant *V = - ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE)) - return ReplaceInstUsesWith(LI, V); - if (CE->getOperand(0)->isNullValue()) { - // Insert a new store to null instruction before the load to indicate - // that this code is not reachable. We do this instead of inserting - // an unreachable instruction directly because we cannot modify the - // CFG. - new StoreInst(UndefValue::get(LI.getType()), - Constant::getNullValue(Op->getType()), &LI); - return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); - } - - } else if (CE->isCast()) { - if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) - return Res; - } - } - } - - // If this load comes from anywhere in a constant global, and if the global - // is all undef or zero, we know what it loads. - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op->getUnderlyingObject())){ - if (GV->isConstant() && GV->hasDefinitiveInitializer()) { - if (GV->getInitializer()->isNullValue()) - return ReplaceInstUsesWith(LI, Constant::getNullValue(LI.getType())); - else if (isa<UndefValue>(GV->getInitializer())) - return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); - } + // load null/undef -> unreachable + // TODO: Consider a target hook for valid address spaces for this xform. + if (isa<UndefValue>(Op) || + (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) { + // Insert a new store to null instruction before the load to indicate that + // this code is not reachable. We do this instead of inserting an + // unreachable instruction directly because we cannot modify the CFG. + new StoreInst(UndefValue::get(LI.getType()), + Constant::getNullValue(Op->getType()), &LI); + return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); } + // Instcombine load (constantexpr_cast global) -> cast (load global) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) + if (CE->isCast()) + if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) + return Res; + if (Op->hasOneUse()) { // Change select and PHI nodes to select values instead of addresses: this // helps alias analysis out a lot, allows many others simplifications, and diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index f6de362..223d2b9 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -138,7 +138,6 @@ namespace { void SplitExitEdges(Loop *L, const SmallVector<BasicBlock *, 8> &ExitBlocks); bool UnswitchIfProfitable(Value *LoopCond, Constant *Val); - unsigned getLoopUnswitchCost(Value *LIC); void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, BasicBlock *ExitBlock); void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L); @@ -400,25 +399,6 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, return true; } -/// getLoopUnswitchCost - Return the cost (code size growth) that will happen if -/// we choose to unswitch current loop on the specified value. -/// -unsigned LoopUnswitch::getLoopUnswitchCost(Value *LIC) { - // If the condition is trivial, always unswitch. There is no code growth for - // this case. - if (IsTrivialUnswitchCondition(LIC)) - return 0; - - // FIXME: This is overly conservative because it does not take into - // consideration code simplification opportunities. - CodeMetrics Metrics; - for (Loop::block_iterator I = currentLoop->block_begin(), - E = currentLoop->block_end(); - I != E; ++I) - Metrics.analyzeBasicBlock(*I); - return Metrics.NumInsts; -} - /// UnswitchIfProfitable - We have found that we can unswitch currentLoop when /// LoopCond == Val to simplify the loop. If we decide that this is profitable, /// unswitch the loop, reprocess the pieces, then return true. @@ -427,24 +407,35 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){ initLoopData(); Function *F = loopHeader->getParent(); + // If the condition is trivial, always unswitch. There is no code growth for + // this case. + if (!IsTrivialUnswitchCondition(LoopCond)) { + // Check to see if it would be profitable to unswitch current loop. - // Check to see if it would be profitable to unswitch current loop. - unsigned Cost = getLoopUnswitchCost(LoopCond); - - // Do not do non-trivial unswitch while optimizing for size. - if (Cost && OptimizeForSize) - return false; - if (Cost && !F->isDeclaration() && F->hasFnAttr(Attribute::OptimizeForSize)) - return false; + // Do not do non-trivial unswitch while optimizing for size. + if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize)) + return false; - if (Cost > Threshold) { - // FIXME: this should estimate growth by the amount of code shared by the - // resultant unswitched loops. - // - DEBUG(errs() << "NOT unswitching loop %" - << currentLoop->getHeader()->getName() << ", cost too high: " - << currentLoop->getBlocks().size() << "\n"); - return false; + // FIXME: This is overly conservative because it does not take into + // consideration code simplification opportunities and code that can + // be shared by the resultant unswitched loops. + CodeMetrics Metrics; + for (Loop::block_iterator I = currentLoop->block_begin(), + E = currentLoop->block_end(); + I != E; ++I) + Metrics.analyzeBasicBlock(*I); + + // Limit the number of instructions to avoid causing significant code + // expansion, and the number of basic blocks, to avoid loops with + // large numbers of branches which cause loop unswitching to go crazy. + // This is a very ad-hoc heuristic. + if (Metrics.NumInsts > Threshold || + Metrics.NumBlocks * 5 > Threshold) { + DEBUG(errs() << "NOT unswitching loop %" + << currentLoop->getHeader()->getName() << ", cost too high: " + << currentLoop->getBlocks().size() << "\n"); + return false; + } } Constant *CondVal; diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index e6ffac2..af29f97 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -121,7 +121,6 @@ static bool isUnmovableInstruction(Instruction *I) { if (I->getOpcode() == Instruction::PHI || I->getOpcode() == Instruction::Alloca || I->getOpcode() == Instruction::Load || - I->getOpcode() == Instruction::Malloc || I->getOpcode() == Instruction::Invoke || (I->getOpcode() == Instruction::Call && !isa<DbgInfoIntrinsic>(I)) || diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index b5edf4e..b745097 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -1229,7 +1229,10 @@ CallOverdefined: TMRVI = TrackedMultipleRetVals.find(std::make_pair(F, 0)); if (TMRVI == TrackedMultipleRetVals.end()) goto CallOverdefined; - + + // Need to mark as overdefined, otherwise it stays undefined which + // creates extractvalue undef, <idx> + markOverdefined(I); // If we are tracking this callee, propagate the return values of the call // into this call site. We do this by walking all the uses. Single-index // ExtractValueInst uses can be tracked; anything more complicated is @@ -1271,7 +1274,6 @@ CallOverdefined: } } - void SCCPSolver::Solve() { // Process the work lists until they are empty! while (!BBWorkList.empty() || !InstWorkList.empty() || diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 4931ab3..35907fd 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -425,14 +425,26 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, if (L) { if (IsLoopEntry) { - if (Loop *PredLoop = LI->getLoopFor(Preds[0])) { - // Add the new block to the nearest enclosing loop (and not an - // adjacent loop). - while (PredLoop && !PredLoop->contains(BB)) - PredLoop = PredLoop->getParentLoop(); - if (PredLoop) - PredLoop->addBasicBlockToLoop(NewBB, LI->getBase()); - } + // Add the new block to the nearest enclosing loop (and not an + // adjacent loop). To find this, examine each of the predecessors and + // determine which loops enclose them, and select the most-nested loop + // which contains the loop containing the block being split. + Loop *InnermostPredLoop = 0; + for (unsigned i = 0; i != NumPreds; ++i) + if (Loop *PredLoop = LI->getLoopFor(Preds[i])) { + // Seek a loop which actually contains the block being split (to + // avoid adjacent loops). + while (PredLoop && !PredLoop->contains(BB)) + PredLoop = PredLoop->getParentLoop(); + // Select the most-nested of these loops which contains the block. + if (PredLoop && + PredLoop->contains(BB) && + (!InnermostPredLoop || + InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth())) + InnermostPredLoop = PredLoop; + } + if (InnermostPredLoop) + InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else { L->addBasicBlockToLoop(NewBB, LI->getBase()); if (SplitMakesNewLoopHeader) diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 0d00d69..619c939 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -444,18 +444,15 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD, if (InlinedFunctionInfo.ContainsDynamicAllocas) { Module *M = Caller->getParent(); // Get the two intrinsics we care about. - Constant *StackSave, *StackRestore; - StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); - StackRestore = Intrinsic::getDeclaration(M, Intrinsic::stackrestore); + Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); + Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); // If we are preserving the callgraph, add edges to the stacksave/restore // functions for the calls we insert. CallGraphNode *StackSaveCGN = 0, *StackRestoreCGN = 0, *CallerNode = 0; if (CG) { - // We know that StackSave/StackRestore are Function*'s, because they are - // intrinsics which must have the right types. - StackSaveCGN = CG->getOrInsertFunction(cast<Function>(StackSave)); - StackRestoreCGN = CG->getOrInsertFunction(cast<Function>(StackRestore)); + StackSaveCGN = CG->getOrInsertFunction(StackSave); + StackRestoreCGN = CG->getOrInsertFunction(StackRestore); CallerNode = (*CG)[Caller]; } @@ -480,7 +477,8 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD, for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { - CallInst::Create(StackRestore, SavedPtr, "", UI); + CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", UI); + if (CG) CallerNode->addCalledFunction(CI, StackRestoreCGN); ++NumStackRestores; } } diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp index 1fa51a3..7f11acf 100644 --- a/lib/Transforms/Utils/InstructionNamer.cpp +++ b/lib/Transforms/Utils/InstructionNamer.cpp @@ -33,11 +33,11 @@ namespace { for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) if (!AI->hasName() && AI->getType() != Type::getVoidTy(F.getContext())) - AI->setName("tmp"); + AI->setName("arg"); for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { if (!BB->hasName()) - BB->setName("BB"); + BB->setName("bb"); for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (!I->hasName() && I->getType() != Type::getVoidTy(F.getContext())) diff --git a/lib/Transforms/Utils/LowerAllocations.cpp b/lib/Transforms/Utils/LowerAllocations.cpp index f26d7c1..9c9113d 100644 --- a/lib/Transforms/Utils/LowerAllocations.cpp +++ b/lib/Transforms/Utils/LowerAllocations.cpp @@ -1,4 +1,4 @@ -//===- LowerAllocations.cpp - Reduce malloc & free insts to calls ---------===// +//===- LowerAllocations.cpp - Reduce free insts to calls ------------------===// // // The LLVM Compiler Infrastructure // @@ -29,18 +29,15 @@ using namespace llvm; STATISTIC(NumLowered, "Number of allocations lowered"); namespace { - /// LowerAllocations - Turn malloc and free instructions into @malloc and - /// @free calls. + /// LowerAllocations - Turn free instructions into @free calls. /// class VISIBILITY_HIDDEN LowerAllocations : public BasicBlockPass { Constant *FreeFunc; // Functions in the module we are processing // Initialized by doInitialization - bool LowerMallocArgToInteger; public: static char ID; // Pass ID, replacement for typeid - explicit LowerAllocations(bool LowerToInt = false) - : BasicBlockPass(&ID), FreeFunc(0), - LowerMallocArgToInteger(LowerToInt) {} + explicit LowerAllocations() + : BasicBlockPass(&ID), FreeFunc(0) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetData>(); @@ -54,7 +51,7 @@ namespace { } /// doPassInitialization - For the lower allocations pass, this ensures that - /// a module contains a declaration for a malloc and a free function. + /// a module contains a declaration for a free function. /// bool doInitialization(Module &M); @@ -76,13 +73,13 @@ X("lowerallocs", "Lower allocations from instructions to calls"); // Publically exposed interface to pass... const PassInfo *const llvm::LowerAllocationsID = &X; // createLowerAllocationsPass - Interface to this file... -Pass *llvm::createLowerAllocationsPass(bool LowerMallocArgToInteger) { - return new LowerAllocations(LowerMallocArgToInteger); +Pass *llvm::createLowerAllocationsPass() { + return new LowerAllocations(); } // doInitialization - For the lower allocations pass, this ensures that a -// module contains a declaration for a malloc and a free function. +// module contains a declaration for a free function. // // This function is always successful. // @@ -102,25 +99,9 @@ bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) { BasicBlock::InstListType &BBIL = BB.getInstList(); - const TargetData &TD = getAnalysis<TargetData>(); - const Type *IntPtrTy = TD.getIntPtrType(BB.getContext()); - - // Loop over all of the instructions, looking for malloc or free instructions + // Loop over all of the instructions, looking for free instructions for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) { - if (MallocInst *MI = dyn_cast<MallocInst>(I)) { - Value *ArraySize = MI->getOperand(0); - if (ArraySize->getType() != IntPtrTy) - ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, - false /*ZExt*/, "", I); - Value *MCast = CallInst::CreateMalloc(I, IntPtrTy, - MI->getAllocatedType(), ArraySize); - - // Replace all uses of the old malloc inst with the cast inst - MI->replaceAllUsesWith(MCast); - I = --BBIL.erase(I); // remove and delete the malloc instr... - Changed = true; - ++NumLowered; - } else if (FreeInst *FI = dyn_cast<FreeInst>(I)) { + if (FreeInst *FI = dyn_cast<FreeInst>(I)) { Value *PtrCast = new BitCastInst(FI->getOperand(0), Type::getInt8PtrTy(BB.getContext()), "", I); diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 780ee26..8a07c35 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -48,7 +48,7 @@ void SSAUpdater::Initialize(Value *ProtoValue) { AV = new AvailableValsTy(); else getAvailableVals(AV).clear(); - + if (IPI == 0) IPI = new IncomingPredInfoTy(); else @@ -104,12 +104,12 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // GetValueAtEndOfBlock to do our work. if (!getAvailableVals(AV).count(BB)) return GetValueAtEndOfBlock(BB); - + // Otherwise, we have the hard case. Get the live-in values for each // predecessor. SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues; Value *SingularValue = 0; - + // We can get our predecessor info by walking the pred_iterator list, but it // is relatively slow. If we already have PHI nodes in this block, walk one // of them to get the predecessor list instead. @@ -118,7 +118,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { BasicBlock *PredBB = SomePhi->getIncomingBlock(i); Value *PredVal = GetValueAtEndOfBlock(PredBB); PredValues.push_back(std::make_pair(PredBB, PredVal)); - + // Compute SingularValue. if (i == 0) SingularValue = PredVal; @@ -131,7 +131,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { BasicBlock *PredBB = *PI; Value *PredVal = GetValueAtEndOfBlock(PredBB); PredValues.push_back(std::make_pair(PredBB, PredVal)); - + // Compute SingularValue. if (isFirstPred) { SingularValue = PredVal; @@ -140,25 +140,25 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { SingularValue = 0; } } - + // If there are no predecessors, just return undef. if (PredValues.empty()) return UndefValue::get(PrototypeValue->getType()); - + // Otherwise, if all the merged values are the same, just use it. if (SingularValue != 0) return SingularValue; - + // Otherwise, we do need a PHI: insert one now. PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(), &BB->front()); InsertedPHI->reserveOperandSpace(PredValues.size()); - + // Fill in all the predecessors of the PHI. for (unsigned i = 0, e = PredValues.size(); i != e; ++i) InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first); - + // See if the PHI node can be merged to a single value. This can happen in // loop cases when we get a PHI of itself and one other value. if (Value *ConstVal = InsertedPHI->hasConstantValue()) { @@ -168,7 +168,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); - + DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n"); return InsertedPHI; } @@ -177,11 +177,14 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { /// which use their value in the corresponding predecessor. void SSAUpdater::RewriteUse(Use &U) { Instruction *User = cast<Instruction>(U.getUser()); - BasicBlock *UseBB = User->getParent(); - if (PHINode *UserPN = dyn_cast<PHINode>(User)) - UseBB = UserPN->getIncomingBlock(U); - U.set(GetValueInMiddleOfBlock(UseBB)); + Value *V; + if (PHINode *UserPN = dyn_cast<PHINode>(User)) + V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U)); + else + V = GetValueInMiddleOfBlock(User->getParent()); + + U.set(V); } @@ -192,11 +195,11 @@ void SSAUpdater::RewriteUse(Use &U) { /// Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { AvailableValsTy &AvailableVals = getAvailableVals(AV); - + // Query AvailableVals by doing an insertion of null. std::pair<AvailableValsTy::iterator, bool> InsertRes = AvailableVals.insert(std::make_pair(BB, WeakVH())); - + // Handle the case when the insertion fails because we have already seen BB. if (!InsertRes.second) { // If the insertion failed, there are two cases. The first case is that the @@ -204,7 +207,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { // return the value. if (InsertRes.first->second != 0) return InsertRes.first->second; - + // Otherwise, if the value we find is null, then this is the value is not // known but it is being computed elsewhere in our recursion. This means // that we have a cycle. Handle this by inserting a PHI node and returning @@ -214,7 +217,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(), &BB->front()); } - + // Okay, the value isn't in the map and we just inserted a null in the entry // to indicate that we're processing the block. Since we have no idea what // value is in this block, we have to recurse through our predecessors. @@ -225,13 +228,13 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { // of the recursion, just use IncomingPredInfo as an explicit stack. IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI); unsigned FirstPredInfoEntry = IncomingPredInfo.size(); - + // As we're walking the predecessors, keep track of whether they are all // producing the same value. If so, this value will capture it, if not, it // will get reset to null. We distinguish the no-predecessor case explicitly // below. TrackingVH<Value> SingularValue; - + // We can get our predecessor info by walking the pred_iterator list, but it // is relatively slow. If we already have PHI nodes in this block, walk one // of them to get the predecessor list instead. @@ -240,7 +243,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { BasicBlock *PredBB = SomePhi->getIncomingBlock(i); Value *PredVal = GetValueAtEndOfBlockInternal(PredBB); IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal)); - + // Compute SingularValue. if (i == 0) SingularValue = PredVal; @@ -253,7 +256,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { BasicBlock *PredBB = *PI; Value *PredVal = GetValueAtEndOfBlockInternal(PredBB); IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal)); - + // Compute SingularValue. if (isFirstPred) { SingularValue = PredVal; @@ -262,19 +265,19 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { SingularValue = 0; } } - + // If there are no predecessors, then we must have found an unreachable block // just return 'undef'. Since there are no predecessors, InsertRes must not // be invalidated. if (IncomingPredInfo.size() == FirstPredInfoEntry) return InsertRes.first->second = UndefValue::get(PrototypeValue->getType()); - + /// Look up BB's entry in AvailableVals. 'InsertRes' may be invalidated. If /// this block is involved in a loop, a no-entry PHI node will have been /// inserted as InsertedVal. Otherwise, we'll still have the null we inserted /// above. TrackingVH<Value> &InsertedVal = AvailableVals[BB]; - + // If all the predecessor values are the same then we don't need to insert a // PHI. This is the simple and common case. if (SingularValue) { @@ -291,31 +294,31 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { } else { InsertedVal = SingularValue; } - + // Drop the entries we added in IncomingPredInfo to restore the stack. IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, IncomingPredInfo.end()); return InsertedVal; } - + // Otherwise, we do need a PHI: insert one now if we don't already have one. if (InsertedVal == 0) InsertedVal = PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(), &BB->front()); - + PHINode *InsertedPHI = cast<PHINode>(InsertedVal); InsertedPHI->reserveOperandSpace(IncomingPredInfo.size()-FirstPredInfoEntry); - + // Fill in all the predecessors of the PHI. for (IncomingPredInfoTy::iterator I = IncomingPredInfo.begin()+FirstPredInfoEntry, E = IncomingPredInfo.end(); I != E; ++I) InsertedPHI->addIncoming(I->second, I->first); - + // Drop the entries we added in IncomingPredInfo to restore the stack. IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, IncomingPredInfo.end()); - + // See if the PHI node can be merged to a single value. This can happen in // loop cases when we get a PHI of itself and one other value. if (Value *ConstVal = InsertedPHI->hasConstantValue()) { @@ -324,12 +327,10 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { InsertedVal = ConstVal; } else { DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n"); - + // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); } - + return InsertedVal; } - - diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index b5ae81b..d8a708d 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -680,6 +680,8 @@ void SlotTracker::processFunction() { ST_DEBUG("Inserting Instructions:\n"); MetadataContext &TheMetadata = TheFunction->getContext().getMetadata(); + typedef SmallVector<std::pair<unsigned, TrackingVH<MDNode> >, 2> MDMapTy; + MDMapTy MDs; // Add all of the basic blocks and instructions with no names. for (Function::const_iterator BB = TheFunction->begin(), @@ -696,12 +698,11 @@ void SlotTracker::processFunction() { CreateMetadataSlot(N); // Process metadata attached with this instruction. - const MetadataContext::MDMapTy *MDs = TheMetadata.getMDs(I); - if (MDs) - for (MetadataContext::MDMapTy::const_iterator MI = MDs->begin(), - ME = MDs->end(); MI != ME; ++MI) - if (MDNode *MDN = dyn_cast_or_null<MDNode>(MI->second)) - CreateMetadataSlot(MDN); + MDs.clear(); + TheMetadata.getMDs(I, MDs); + for (MDMapTy::const_iterator MI = MDs.begin(), ME = MDs.end(); MI != ME; + ++MI) + CreateMetadataSlot(MI->second); } } @@ -818,9 +819,8 @@ void SlotTracker::CreateMetadataSlot(const MDNode *N) { unsigned DestSlot = mdnNext++; mdnMap[N] = DestSlot; - for (MDNode::const_elem_iterator MDI = N->elem_begin(), - MDE = N->elem_end(); MDI != MDE; ++MDI) { - const Value *TV = *MDI; + for (unsigned i = 0, e = N->getNumElements(); i != e; ++i) { + const Value *TV = N->getElement(i); if (TV) if (const MDNode *N2 = dyn_cast<MDNode>(TV)) CreateMetadataSlot(N2); @@ -906,9 +906,8 @@ static void WriteMDNodes(formatted_raw_ostream &Out, TypePrinting &TypePrinter, Out << '!' << i << " = metadata "; const MDNode *Node = Nodes[i]; Out << "!{"; - for (MDNode::const_elem_iterator NI = Node->elem_begin(), - NE = Node->elem_end(); NI != NE;) { - const Value *V = *NI; + for (unsigned mi = 0, me = Node->getNumElements(); mi != me; ++mi) { + const Value *V = Node->getElement(mi); if (!V) Out << "null"; else if (const MDNode *N = dyn_cast<MDNode>(V)) { @@ -916,11 +915,12 @@ static void WriteMDNodes(formatted_raw_ostream &Out, TypePrinting &TypePrinter, Out << '!' << Machine.getMetadataSlot(N); } else { - TypePrinter.print((*NI)->getType(), Out); + TypePrinter.print(V->getType(), Out); Out << ' '; - WriteAsOperandInternal(Out, *NI, &TypePrinter, &Machine); + WriteAsOperandInternal(Out, Node->getElement(mi), + &TypePrinter, &Machine); } - if (++NI != NE) + if (mi + 1 != me) Out << ", "; } @@ -1206,8 +1206,8 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, Out << "asm "; if (IA->hasSideEffects()) Out << "sideeffect "; - if (IA->isMsAsm()) - Out << "msasm "; + if (IA->isAlignStack()) + Out << "alignstack "; Out << '"'; PrintEscapedString(IA->getAsmString(), Out); Out << "\", \""; @@ -1296,7 +1296,7 @@ class AssemblyWriter { TypePrinting TypePrinter; AssemblyAnnotationWriter *AnnotationWriter; std::vector<const Type*> NumberedTypes; - DenseMap<unsigned, const char *> MDNames; + DenseMap<unsigned, StringRef> MDNames; public: inline AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, @@ -1307,11 +1307,12 @@ public: // FIXME: Provide MDPrinter if (M) { MetadataContext &TheMetadata = M->getContext().getMetadata(); - const StringMap<unsigned> *Names = TheMetadata.getHandlerNames(); - for (StringMapConstIterator<unsigned> I = Names->begin(), - E = Names->end(); I != E; ++I) { - const StringMapEntry<unsigned> &Entry = *I; - MDNames[I->second] = Entry.getKeyData(); + SmallVector<std::pair<unsigned, StringRef>, 4> Names; + TheMetadata.getHandlerNames(Names); + for (SmallVector<std::pair<unsigned, StringRef>, 4>::iterator + I = Names.begin(), + E = Names.end(); I != E; ++I) { + MDNames[I->first] = I->second; } } } @@ -2035,13 +2036,13 @@ void AssemblyWriter::printInstruction(const Instruction &I) { // Print Metadata info if (!MDNames.empty()) { MetadataContext &TheMetadata = I.getContext().getMetadata(); - const MetadataContext::MDMapTy *MDMap = TheMetadata.getMDs(&I); - if (MDMap) - for (MetadataContext::MDMapTy::const_iterator MI = MDMap->begin(), - ME = MDMap->end(); MI != ME; ++MI) - if (const MDNode *MD = dyn_cast_or_null<MDNode>(MI->second)) - Out << ", !" << MDNames[MI->first] - << " !" << Machine.getMetadataSlot(MD); + typedef SmallVector<std::pair<unsigned, TrackingVH<MDNode> >, 2> MDMapTy; + MDMapTy MDs; + TheMetadata.getMDs(&I, MDs); + for (MDMapTy::const_iterator MI = MDs.begin(), ME = MDs.end(); MI != ME; + ++MI) + Out << ", !" << MDNames[MI->first] + << " !" << Machine.getMetadataSlot(MI->second); } printInfoComment(I); } diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp index c1fcc5f..2c0a67f 100644 --- a/lib/VMCore/ConstantFold.cpp +++ b/lib/VMCore/ConstantFold.cpp @@ -179,6 +179,151 @@ static Constant *FoldBitCast(LLVMContext &Context, } +/// ExtractConstantBytes - V is an integer constant which only has a subset of +/// its bytes used. The bytes used are indicated by ByteStart (which is the +/// first byte used, counting from the least significant byte) and ByteSize, +/// which is the number of bytes used. +/// +/// This function analyzes the specified constant to see if the specified byte +/// range can be returned as a simplified constant. If so, the constant is +/// returned, otherwise null is returned. +/// +static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart, + unsigned ByteSize) { + assert(isa<IntegerType>(C->getType()) && + (cast<IntegerType>(C->getType())->getBitWidth() & 7) == 0 && + "Non-byte sized integer input"); + unsigned CSize = cast<IntegerType>(C->getType())->getBitWidth()/8; + assert(ByteSize && "Must be accessing some piece"); + assert(ByteStart+ByteSize <= CSize && "Extracting invalid piece from input"); + assert(ByteSize != CSize && "Should not extract everything"); + + // Constant Integers are simple. + if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) { + APInt V = CI->getValue(); + if (ByteStart) + V = V.lshr(ByteStart*8); + V.trunc(ByteSize*8); + return ConstantInt::get(CI->getContext(), V); + } + + // In the input is a constant expr, we might be able to recursively simplify. + // If not, we definitely can't do anything. + ConstantExpr *CE = dyn_cast<ConstantExpr>(C); + if (CE == 0) return 0; + + switch (CE->getOpcode()) { + default: return 0; + case Instruction::Or: { + Constant *RHS = ExtractConstantBytes(C->getOperand(1), ByteStart, ByteSize); + if (RHS == 0) + return 0; + + // X | -1 -> -1. + if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) + if (RHSC->isAllOnesValue()) + return RHSC; + + Constant *LHS = ExtractConstantBytes(C->getOperand(0), ByteStart, ByteSize); + if (LHS == 0) + return 0; + return ConstantExpr::getOr(LHS, RHS); + } + case Instruction::And: { + Constant *RHS = ExtractConstantBytes(C->getOperand(1), ByteStart, ByteSize); + if (RHS == 0) + return 0; + + // X & 0 -> 0. + if (RHS->isNullValue()) + return RHS; + + Constant *LHS = ExtractConstantBytes(C->getOperand(0), ByteStart, ByteSize); + if (LHS == 0) + return 0; + return ConstantExpr::getAnd(LHS, RHS); + } + case Instruction::LShr: { + ConstantInt *Amt = dyn_cast<ConstantInt>(CE->getOperand(1)); + if (Amt == 0) + return 0; + unsigned ShAmt = Amt->getZExtValue(); + // Cannot analyze non-byte shifts. + if ((ShAmt & 7) != 0) + return 0; + ShAmt >>= 3; + + // If the extract is known to be all zeros, return zero. + if (ByteStart >= CSize-ShAmt) + return Constant::getNullValue(IntegerType::get(CE->getContext(), + ByteSize*8)); + // If the extract is known to be fully in the input, extract it. + if (ByteStart+ByteSize+ShAmt <= CSize) + return ExtractConstantBytes(C->getOperand(0), ByteStart+ShAmt, ByteSize); + + // TODO: Handle the 'partially zero' case. + return 0; + } + + case Instruction::Shl: { + ConstantInt *Amt = dyn_cast<ConstantInt>(CE->getOperand(1)); + if (Amt == 0) + return 0; + unsigned ShAmt = Amt->getZExtValue(); + // Cannot analyze non-byte shifts. + if ((ShAmt & 7) != 0) + return 0; + ShAmt >>= 3; + + // If the extract is known to be all zeros, return zero. + if (ByteStart+ByteSize <= ShAmt) + return Constant::getNullValue(IntegerType::get(CE->getContext(), + ByteSize*8)); + // If the extract is known to be fully in the input, extract it. + if (ByteStart >= ShAmt) + return ExtractConstantBytes(C->getOperand(0), ByteStart-ShAmt, ByteSize); + + // TODO: Handle the 'partially zero' case. + return 0; + } + + case Instruction::ZExt: { + unsigned SrcBitSize = + cast<IntegerType>(C->getOperand(0)->getType())->getBitWidth(); + + // If extracting something that is completely zero, return 0. + if (ByteStart*8 >= SrcBitSize) + return Constant::getNullValue(IntegerType::get(CE->getContext(), + ByteSize*8)); + + // If exactly extracting the input, return it. + if (ByteStart == 0 && ByteSize*8 == SrcBitSize) + return C->getOperand(0); + + // If extracting something completely in the input, if if the input is a + // multiple of 8 bits, recurse. + if ((SrcBitSize&7) == 0 && (ByteStart+ByteSize)*8 <= SrcBitSize) + return ExtractConstantBytes(C->getOperand(0), ByteStart, ByteSize); + + // Otherwise, if extracting a subset of the input, which is not multiple of + // 8 bits, do a shift and trunc to get the bits. + if ((ByteStart+ByteSize)*8 < SrcBitSize) { + assert((SrcBitSize&7) && "Shouldn't get byte sized case here"); + Constant *Res = C->getOperand(0); + if (ByteStart) + Res = ConstantExpr::getLShr(Res, + ConstantInt::get(Res->getType(), ByteStart*8)); + return ConstantExpr::getTrunc(Res, IntegerType::get(C->getContext(), + ByteSize*8)); + } + + // TODO: Handle the 'partially zero' case. + return 0; + } + } +} + + Constant *llvm::ConstantFoldCastInstruction(LLVMContext &Context, unsigned opc, Constant *V, const Type *DestTy) { @@ -236,6 +381,8 @@ Constant *llvm::ConstantFoldCastInstruction(LLVMContext &Context, // We actually have to do a cast now. Perform the cast according to the // opcode specified. switch (opc) { + default: + llvm_unreachable("Failed to cast constant expression"); case Instruction::FPTrunc: case Instruction::FPExt: if (ConstantFP *FPC = dyn_cast<ConstantFP>(V)) { @@ -300,23 +447,27 @@ Constant *llvm::ConstantFoldCastInstruction(LLVMContext &Context, return ConstantInt::get(Context, Result); } return 0; - case Instruction::Trunc: + case Instruction::Trunc: { + uint32_t DestBitWidth = cast<IntegerType>(DestTy)->getBitWidth(); if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth(); APInt Result(CI->getValue()); - Result.trunc(BitWidth); + Result.trunc(DestBitWidth); return ConstantInt::get(Context, Result); } + + // The input must be a constantexpr. See if we can simplify this based on + // the bytes we are demanding. Only do this if the source and dest are an + // even multiple of a byte. + if ((DestBitWidth & 7) == 0 && + (cast<IntegerType>(V->getType())->getBitWidth() & 7) == 0) + if (Constant *Res = ExtractConstantBytes(V, 0, DestBitWidth / 8)) + return Res; + return 0; + } case Instruction::BitCast: return FoldBitCast(Context, V, DestTy); - default: - assert(!"Invalid CE CastInst opcode"); - break; } - - llvm_unreachable("Failed to cast constant expression"); - return 0; } Constant *llvm::ConstantFoldSelectInstruction(LLVMContext&, diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp index 529c455..02c3352 100644 --- a/lib/VMCore/Constants.cpp +++ b/lib/VMCore/Constants.cpp @@ -232,7 +232,6 @@ ConstantInt::ConstantInt(const IntegerType *Ty, const APInt& V) ConstantInt* ConstantInt::getTrue(LLVMContext &Context) { LLVMContextImpl *pImpl = Context.pImpl; - sys::SmartScopedWriter<true>(pImpl->ConstantsLock); if (pImpl->TheTrueVal) return pImpl->TheTrueVal; else @@ -242,7 +241,6 @@ ConstantInt* ConstantInt::getTrue(LLVMContext &Context) { ConstantInt* ConstantInt::getFalse(LLVMContext &Context) { LLVMContextImpl *pImpl = Context.pImpl; - sys::SmartScopedWriter<true>(pImpl->ConstantsLock); if (pImpl->TheFalseVal) return pImpl->TheFalseVal; else @@ -261,22 +259,9 @@ ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt& V) { const IntegerType *ITy = IntegerType::get(Context, V.getBitWidth()); // get an existing value or the insertion position DenseMapAPIntKeyInfo::KeyTy Key(V, ITy); - - Context.pImpl->ConstantsLock.reader_acquire(); ConstantInt *&Slot = Context.pImpl->IntConstants[Key]; - Context.pImpl->ConstantsLock.reader_release(); - - if (!Slot) { - sys::SmartScopedWriter<true> Writer(Context.pImpl->ConstantsLock); - ConstantInt *&NewSlot = Context.pImpl->IntConstants[Key]; - if (!Slot) { - NewSlot = new ConstantInt(ITy, V); - } - - return NewSlot; - } else { - return Slot; - } + if (!Slot) Slot = new ConstantInt(ITy, V); + return Slot; } Constant* ConstantInt::get(const Type* Ty, uint64_t V, bool isSigned) { @@ -405,32 +390,24 @@ ConstantFP* ConstantFP::get(LLVMContext &Context, const APFloat& V) { LLVMContextImpl* pImpl = Context.pImpl; - pImpl->ConstantsLock.reader_acquire(); ConstantFP *&Slot = pImpl->FPConstants[Key]; - pImpl->ConstantsLock.reader_release(); if (!Slot) { - sys::SmartScopedWriter<true> Writer(pImpl->ConstantsLock); - ConstantFP *&NewSlot = pImpl->FPConstants[Key]; - if (!NewSlot) { - const Type *Ty; - if (&V.getSemantics() == &APFloat::IEEEsingle) - Ty = Type::getFloatTy(Context); - else if (&V.getSemantics() == &APFloat::IEEEdouble) - Ty = Type::getDoubleTy(Context); - else if (&V.getSemantics() == &APFloat::x87DoubleExtended) - Ty = Type::getX86_FP80Ty(Context); - else if (&V.getSemantics() == &APFloat::IEEEquad) - Ty = Type::getFP128Ty(Context); - else { - assert(&V.getSemantics() == &APFloat::PPCDoubleDouble && - "Unknown FP format"); - Ty = Type::getPPC_FP128Ty(Context); - } - NewSlot = new ConstantFP(Ty, V); + const Type *Ty; + if (&V.getSemantics() == &APFloat::IEEEsingle) + Ty = Type::getFloatTy(Context); + else if (&V.getSemantics() == &APFloat::IEEEdouble) + Ty = Type::getDoubleTy(Context); + else if (&V.getSemantics() == &APFloat::x87DoubleExtended) + Ty = Type::getX86_FP80Ty(Context); + else if (&V.getSemantics() == &APFloat::IEEEquad) + Ty = Type::getFP128Ty(Context); + else { + assert(&V.getSemantics() == &APFloat::PPCDoubleDouble && + "Unknown FP format"); + Ty = Type::getPPC_FP128Ty(Context); } - - return NewSlot; + Slot = new ConstantFP(Ty, V); } return Slot; @@ -1908,7 +1885,6 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To, Replacement = ConstantAggregateZero::get(getType()); } else { // Check to see if we have this array type already. - sys::SmartScopedWriter<true> Writer(pImpl->ConstantsLock); bool Exists; LLVMContextImpl::ArrayConstantsTy::MapTy::iterator I = pImpl->ArrayConstants.InsertOrGetItem(Lookup, Exists); @@ -1987,7 +1963,6 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To, Replacement = ConstantAggregateZero::get(getType()); } else { // Check to see if we have this array type already. - sys::SmartScopedWriter<true> Writer(pImpl->ConstantsLock); bool Exists; LLVMContextImpl::StructConstantsTy::MapTy::iterator I = pImpl->StructConstants.InsertOrGetItem(Lookup, Exists); diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp index bff3087..a28037d 100644 --- a/lib/VMCore/Core.cpp +++ b/lib/VMCore/Core.cpp @@ -885,9 +885,9 @@ LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant, LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString, const char *Constraints, int HasSideEffects, - int IsMsAsm) { + int IsAlignStack) { return wrap(InlineAsm::get(dyn_cast<FunctionType>(unwrap(Ty)), AsmString, - Constraints, HasSideEffects, IsMsAsm)); + Constraints, HasSideEffects, IsAlignStack)); } /*--.. Operations on global variables, functions, and aliases (globals) ....--*/ @@ -1699,12 +1699,18 @@ LLVMValueRef LLVMBuildNot(LLVMBuilderRef B, LLVMValueRef V, const char *Name) { LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty, const char *Name) { - return wrap(unwrap(B)->CreateMalloc(unwrap(Ty), 0, Name)); + const Type* IntPtrT = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext()); + return wrap(unwrap(B)->Insert(CallInst::CreateMalloc( + unwrap(B)->GetInsertBlock(), IntPtrT, unwrap(Ty), 0, 0, ""), + Twine(Name))); } LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Val, const char *Name) { - return wrap(unwrap(B)->CreateMalloc(unwrap(Ty), unwrap(Val), Name)); + const Type* IntPtrT = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext()); + return wrap(unwrap(B)->Insert(CallInst::CreateMalloc( + unwrap(B)->GetInsertBlock(), IntPtrT, unwrap(Ty), unwrap(Val), 0, ""), + Twine(Name))); } LLVMValueRef LLVMBuildAlloca(LLVMBuilderRef B, LLVMTypeRef Ty, diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp index 0520dfa..3a36a1b 100644 --- a/lib/VMCore/InlineAsm.cpp +++ b/lib/VMCore/InlineAsm.cpp @@ -28,18 +28,20 @@ InlineAsm::~InlineAsm() { InlineAsm *InlineAsm::get(const FunctionType *Ty, const StringRef &AsmString, const StringRef &Constraints, bool hasSideEffects, - bool isMsAsm) { + bool isAlignStack) { // FIXME: memoize! - return new InlineAsm(Ty, AsmString, Constraints, hasSideEffects, isMsAsm); + return new InlineAsm(Ty, AsmString, Constraints, hasSideEffects, + isAlignStack); } InlineAsm::InlineAsm(const FunctionType *Ty, const StringRef &asmString, const StringRef &constraints, bool hasSideEffects, - bool isMsAsm) + bool isAlignStack) : Value(PointerType::getUnqual(Ty), Value::InlineAsmVal), AsmString(asmString), - Constraints(constraints), HasSideEffects(hasSideEffects), IsMsAsm(isMsAsm) { + Constraints(constraints), HasSideEffects(hasSideEffects), + IsAlignStack(isAlignStack) { // Do various checks on the constraint string and type. assert(Verify(Ty, constraints) && "Function type not legal for constraints!"); diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp index 4df536e..dd8a543 100644 --- a/lib/VMCore/Instruction.cpp +++ b/lib/VMCore/Instruction.cpp @@ -127,7 +127,6 @@ const char *Instruction::getOpcodeName(unsigned OpCode) { case Xor: return "xor"; // Memory instructions... - case Malloc: return "malloc"; case Free: return "free"; case Alloca: return "alloca"; case Load: return "load"; @@ -442,7 +441,6 @@ bool Instruction::isSafeToSpeculativelyExecute() const { // overflow-checking arithmetic, etc.) case VAArg: case Alloca: - case Malloc: case Invoke: case PHI: case Store: diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index f3d15cb..e212d5c 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -460,9 +460,10 @@ static Value *checkArraySize(Value *Amt, const Type *IntPtrTy) { return Amt; } -static Value *createMalloc(Instruction *InsertBefore, BasicBlock *InsertAtEnd, - const Type *IntPtrTy, const Type *AllocTy, - Value *ArraySize, const Twine &NameStr) { +static Instruction *createMalloc(Instruction *InsertBefore, + BasicBlock *InsertAtEnd, const Type *IntPtrTy, + const Type *AllocTy, Value *ArraySize, + Function *MallocF, const Twine &NameStr) { assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) && "createMalloc needs either InsertBefore or InsertAtEnd"); @@ -499,27 +500,34 @@ static Value *createMalloc(Instruction *InsertBefore, BasicBlock *InsertAtEnd, BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd; Module* M = BB->getParent()->getParent(); const Type *BPTy = Type::getInt8PtrTy(BB->getContext()); - // prototype malloc as "void *malloc(size_t)" - Constant *MallocF = M->getOrInsertFunction("malloc", BPTy, IntPtrTy, NULL); - if (!cast<Function>(MallocF)->doesNotAlias(0)) - cast<Function>(MallocF)->setDoesNotAlias(0); + if (!MallocF) + // prototype malloc as "void *malloc(size_t)" + MallocF = cast<Function>(M->getOrInsertFunction("malloc", BPTy, + IntPtrTy, NULL)); + if (!MallocF->doesNotAlias(0)) MallocF->setDoesNotAlias(0); const PointerType *AllocPtrType = PointerType::getUnqual(AllocTy); CallInst *MCall = NULL; - Value *MCast = NULL; + Instruction *Result = NULL; if (InsertBefore) { MCall = CallInst::Create(MallocF, AllocSize, "malloccall", InsertBefore); - // Create a cast instruction to convert to the right type... - MCast = new BitCastInst(MCall, AllocPtrType, NameStr, InsertBefore); + Result = MCall; + if (Result->getType() != AllocPtrType) + // Create a cast instruction to convert to the right type... + Result = new BitCastInst(MCall, AllocPtrType, NameStr, InsertBefore); } else { - MCall = CallInst::Create(MallocF, AllocSize, "malloccall", InsertAtEnd); - // Create a cast instruction to convert to the right type... - MCast = new BitCastInst(MCall, AllocPtrType, NameStr); + MCall = CallInst::Create(MallocF, AllocSize, "malloccall"); + Result = MCall; + if (Result->getType() != AllocPtrType) { + InsertAtEnd->getInstList().push_back(MCall); + // Create a cast instruction to convert to the right type... + Result = new BitCastInst(MCall, AllocPtrType, NameStr); + } } MCall->setTailCall(); assert(MCall->getType() != Type::getVoidTy(BB->getContext()) && "Malloc has void return type"); - return MCast; + return Result; } /// CreateMalloc - Generate the IR for a call to malloc: @@ -528,10 +536,11 @@ static Value *createMalloc(Instruction *InsertBefore, BasicBlock *InsertAtEnd, /// constant 1. /// 2. Call malloc with that argument. /// 3. Bitcast the result of the malloc call to the specified type. -Value *CallInst::CreateMalloc(Instruction *InsertBefore, const Type *IntPtrTy, - const Type *AllocTy, Value *ArraySize, - const Twine &Name) { - return createMalloc(InsertBefore, NULL, IntPtrTy, AllocTy, ArraySize, Name); +Instruction *CallInst::CreateMalloc(Instruction *InsertBefore, + const Type *IntPtrTy, const Type *AllocTy, + Value *ArraySize, const Twine &Name) { + return createMalloc(InsertBefore, NULL, IntPtrTy, AllocTy, + ArraySize, NULL, Name); } /// CreateMalloc - Generate the IR for a call to malloc: @@ -542,10 +551,12 @@ Value *CallInst::CreateMalloc(Instruction *InsertBefore, const Type *IntPtrTy, /// 3. Bitcast the result of the malloc call to the specified type. /// Note: This function does not add the bitcast to the basic block, that is the /// responsibility of the caller. -Value *CallInst::CreateMalloc(BasicBlock *InsertAtEnd, const Type *IntPtrTy, - const Type *AllocTy, Value *ArraySize, - const Twine &Name) { - return createMalloc(NULL, InsertAtEnd, IntPtrTy, AllocTy, ArraySize, Name); +Instruction *CallInst::CreateMalloc(BasicBlock *InsertAtEnd, + const Type *IntPtrTy, const Type *AllocTy, + Value *ArraySize, Function* MallocF, + const Twine &Name) { + return createMalloc(NULL, InsertAtEnd, IntPtrTy, AllocTy, + ArraySize, MallocF, Name); } //===----------------------------------------------------------------------===// @@ -837,7 +848,7 @@ static Value *getAISize(LLVMContext &Context, Value *Amt) { assert(!isa<BasicBlock>(Amt) && "Passed basic block into allocation size parameter! Use other ctor"); assert(Amt->getType() == Type::getInt32Ty(Context) && - "Malloc/Allocation array size is not a 32-bit integer!"); + "Allocation array size is not a 32-bit integer!"); } return Amt; } @@ -3073,18 +3084,6 @@ InsertValueInst *InsertValueInst::clone() const { return New; } -MallocInst *MallocInst::clone() const { - MallocInst *New = new MallocInst(getAllocatedType(), - (Value*)getOperand(0), - getAlignment()); - New->SubclassOptionalData = SubclassOptionalData; - if (hasMetadata()) { - LLVMContext &Context = getContext(); - Context.pImpl->TheMetadata.ValueIsCloned(this, New); - } - return New; -} - AllocaInst *AllocaInst::clone() const { AllocaInst *New = new AllocaInst(getAllocatedType(), (Value*)getOperand(0), diff --git a/lib/VMCore/LLVMContext.cpp b/lib/VMCore/LLVMContext.cpp index 39ed7ed..3b4a1a3 100644 --- a/lib/VMCore/LLVMContext.cpp +++ b/lib/VMCore/LLVMContext.cpp @@ -45,32 +45,6 @@ GetElementPtrConstantExpr::GetElementPtrConstantExpr OperandList[i+1] = IdxList[i]; } -bool LLVMContext::RemoveDeadMetadata() { - std::vector<WeakVH> DeadMDNodes; - bool Changed = false; - while (1) { - - for (FoldingSet<MDNode>::iterator - I = pImpl->MDNodeSet.begin(), - E = pImpl->MDNodeSet.end(); I != E; ++I) { - MDNode *N = &(*I); - if (N->use_empty()) - DeadMDNodes.push_back(WeakVH(N)); - } - - if (DeadMDNodes.empty()) - return Changed; - - while (!DeadMDNodes.empty()) { - Value *V = DeadMDNodes.back(); DeadMDNodes.pop_back(); - if (const MDNode *N = dyn_cast_or_null<MDNode>(V)) - if (N->use_empty()) - delete N; - } - } - return Changed; -} - MetadataContext &LLVMContext::getMetadata() { return pImpl->TheMetadata; } diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h index 83888c3..84902d5 100644 --- a/lib/VMCore/LLVMContextImpl.h +++ b/lib/VMCore/LLVMContextImpl.h @@ -96,7 +96,6 @@ struct DenseMapAPFloatKeyInfo { class LLVMContextImpl { public: - sys::SmartRWMutex<true> ConstantsLock; typedef DenseMap<DenseMapAPIntKeyInfo::KeyTy, ConstantInt*, DenseMapAPIntKeyInfo> IntMapTy; IntMapTy IntConstants; @@ -204,9 +203,6 @@ public: AggZeroConstants.freeConstants(); NullPtrConstants.freeConstants(); UndefValueConstants.freeConstants(); - for (FoldingSet<MDNode>::iterator I = MDNodeSet.begin(), - E = MDNodeSet.end(); I != E; ++I) - I->dropAllReferences(); for (IntMapTy::iterator I = IntConstants.begin(), E = IntConstants.end(); I != E; ++I) { if (I->second->use_empty()) @@ -217,6 +213,7 @@ public: if (I->second->use_empty()) delete I->second; } + MDNodeSet.clear(); } }; diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp index 110c5e3..ad9653f 100644 --- a/lib/VMCore/Metadata.cpp +++ b/lib/VMCore/Metadata.cpp @@ -16,73 +16,44 @@ #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Instruction.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" #include "SymbolTableListTraitsImpl.h" using namespace llvm; //===----------------------------------------------------------------------===// -//MetadataBase implementation +// MetadataBase implementation. // -/// resizeOperands - Metadata keeps track of other metadata uses using -/// OperandList. Resize this list to hold anticipated number of metadata -/// operands. -void MetadataBase::resizeOperands(unsigned NumOps) { - unsigned e = getNumOperands(); - if (NumOps == 0) { - NumOps = e*2; - if (NumOps < 2) NumOps = 2; - } else if (NumOps > NumOperands) { - // No resize needed. - if (ReservedSpace >= NumOps) return; - } else if (NumOps == NumOperands) { - if (ReservedSpace == NumOps) return; - } else { - return; - } - - ReservedSpace = NumOps; - Use *OldOps = OperandList; - Use *NewOps = allocHungoffUses(NumOps); - std::copy(OldOps, OldOps + e, NewOps); - OperandList = NewOps; - if (OldOps) Use::zap(OldOps, OldOps + e, true); -} //===----------------------------------------------------------------------===// -//MDString implementation +// MDString implementation. // -MDString *MDString::get(LLVMContext &Context, const StringRef &Str) { +MDString *MDString::get(LLVMContext &Context, StringRef Str) { LLVMContextImpl *pImpl = Context.pImpl; - sys::SmartScopedWriter<true> Writer(pImpl->ConstantsLock); StringMapEntry<MDString *> &Entry = pImpl->MDStringCache.GetOrCreateValue(Str); MDString *&S = Entry.getValue(); - if (!S) S = new MDString(Context, Entry.getKeyData(), - Entry.getKeyLength()); - - return S; + if (S) return S; + + return S = + new MDString(Context, Entry.getKey()); } //===----------------------------------------------------------------------===// -//MDNode implementation +// MDNode implementation. // -MDNode::MDNode(LLVMContext &C, Value*const* Vals, unsigned NumVals) +MDNode::MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals) : MetadataBase(Type::getMetadataTy(C), Value::MDNodeVal) { - NumOperands = 0; - resizeOperands(NumVals); - for (unsigned i = 0; i != NumVals; ++i) { - // Only record metadata uses. - if (MetadataBase *MB = dyn_cast_or_null<MetadataBase>(Vals[i])) - OperandList[NumOperands++] = MB; - else if(Vals[i] && - Vals[i]->getType()->getTypeID() == Type::MetadataTyID) - OperandList[NumOperands++] = Vals[i]; - Node.push_back(ElementVH(Vals[i], this)); - } + NodeSize = NumVals; + Node = new ElementVH[NodeSize]; + ElementVH *Ptr = Node; + for (unsigned i = 0; i != NumVals; ++i) + *Ptr++ = ElementVH(Vals[i], this); } void MDNode::Profile(FoldingSetNodeID &ID) const { - for (const_elem_iterator I = elem_begin(), E = elem_end(); I != E; ++I) - ID.AddPointer(*I); + for (unsigned i = 0, e = getNumElements(); i != e; ++i) + ID.AddPointer(getElement(i)); } MDNode *MDNode::get(LLVMContext &Context, Value*const* Vals, unsigned NumVals) { @@ -91,37 +62,31 @@ MDNode *MDNode::get(LLVMContext &Context, Value*const* Vals, unsigned NumVals) { for (unsigned i = 0; i != NumVals; ++i) ID.AddPointer(Vals[i]); - pImpl->ConstantsLock.reader_acquire(); void *InsertPoint; - MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint); - pImpl->ConstantsLock.reader_release(); + MDNode *N; + { + N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint); + } + if (N) return N; + N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint); if (!N) { - sys::SmartScopedWriter<true> Writer(pImpl->ConstantsLock); - N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint); - if (!N) { - // InsertPoint will have been set by the FindNodeOrInsertPos call. - N = new MDNode(Context, Vals, NumVals); - pImpl->MDNodeSet.InsertNode(N, InsertPoint); - } + // InsertPoint will have been set by the FindNodeOrInsertPos call. + N = new MDNode(Context, Vals, NumVals); + pImpl->MDNodeSet.InsertNode(N, InsertPoint); } return N; } -/// dropAllReferences - Remove all uses and clear node vector. -void MDNode::dropAllReferences() { - User::dropAllReferences(); - Node.clear(); -} - +/// ~MDNode - Destroy MDNode. MDNode::~MDNode() { { LLVMContextImpl *pImpl = getType()->getContext().pImpl; - sys::SmartScopedWriter<true> Writer(pImpl->ConstantsLock); pImpl->MDNodeSet.RemoveNode(this); } - dropAllReferences(); + delete [] Node; + Node = NULL; } // Replace value from this node's element list. @@ -136,9 +101,8 @@ void MDNode::replaceElement(Value *From, Value *To) { // From in this MDNode's element list. SmallVector<unsigned, 4> Indexes; unsigned Index = 0; - for (SmallVector<ElementVH, 4>::iterator I = Node.begin(), - E = Node.end(); I != E; ++I, ++Index) { - Value *V = *I; + for (unsigned i = 0, e = getNumElements(); i != e; ++i, ++Index) { + Value *V = getElement(i); if (V && V == From) Indexes.push_back(Index); } @@ -147,31 +111,7 @@ void MDNode::replaceElement(Value *From, Value *To) { return; // Remove "this" from the context map. - { - sys::SmartScopedWriter<true> Writer(pImpl->ConstantsLock); - pImpl->MDNodeSet.RemoveNode(this); - } - - // MDNode only lists metadata elements in operand list, because MDNode - // used by MDNode is considered a valid use. However on the side, MDNode - // using a non-metadata value is not considered a "use" of non-metadata - // value. - SmallVector<unsigned, 4> OpIndexes; - unsigned OpIndex = 0; - for (User::op_iterator OI = op_begin(), OE = op_end(); - OI != OE; ++OI, OpIndex++) { - if (*OI == From) - OpIndexes.push_back(OpIndex); - } - if (MetadataBase *MDTo = dyn_cast_or_null<MetadataBase>(To)) { - for (SmallVector<unsigned, 4>::iterator OI = OpIndexes.begin(), - OE = OpIndexes.end(); OI != OE; ++OI) - setOperand(*OI, MDTo); - } else { - for (SmallVector<unsigned, 4>::iterator OI = OpIndexes.begin(), - OE = OpIndexes.end(); OI != OE; ++OI) - setOperand(*OI, 0); - } + pImpl->MDNodeSet.RemoveNode(this); // Replace From element(s) in place. for (SmallVector<unsigned, 4>::iterator I = Indexes.begin(), E = Indexes.end(); @@ -186,10 +126,8 @@ void MDNode::replaceElement(Value *From, Value *To) { // node with updated "this" node. FoldingSetNodeID ID; Profile(ID); - pImpl->ConstantsLock.reader_acquire(); void *InsertPoint; MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint); - pImpl->ConstantsLock.reader_release(); if (N) { N->replaceAllUsesWith(this); @@ -197,39 +135,32 @@ void MDNode::replaceElement(Value *From, Value *To) { N = 0; } - { - sys::SmartScopedWriter<true> Writer(pImpl->ConstantsLock); - N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint); - if (!N) { - // InsertPoint will have been set by the FindNodeOrInsertPos call. - N = this; - pImpl->MDNodeSet.InsertNode(N, InsertPoint); - } + N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint); + if (!N) { + // InsertPoint will have been set by the FindNodeOrInsertPos call. + N = this; + pImpl->MDNodeSet.InsertNode(N, InsertPoint); } } //===----------------------------------------------------------------------===// -//NamedMDNode implementation +// NamedMDNode implementation. // NamedMDNode::NamedMDNode(LLVMContext &C, const Twine &N, - MetadataBase*const* MDs, + MetadataBase *const *MDs, unsigned NumMDs, Module *ParentModule) : MetadataBase(Type::getMetadataTy(C), Value::NamedMDNodeVal), Parent(0) { setName(N); - NumOperands = 0; - resizeOperands(NumMDs); - for (unsigned i = 0; i != NumMDs; ++i) { - if (MDs[i]) - OperandList[NumOperands++] = MDs[i]; - Node.push_back(WeakMetadataVH(MDs[i])); - } + for (unsigned i = 0; i != NumMDs; ++i) + Node.push_back(TrackingVH<MetadataBase>(MDs[i])); + if (ParentModule) ParentModule->getNamedMDList().push_back(this); } NamedMDNode *NamedMDNode::Create(const NamedMDNode *NMD, Module *M) { - assert (NMD && "Invalid source NamedMDNode!"); + assert(NMD && "Invalid source NamedMDNode!"); SmallVector<MetadataBase *, 4> Elems; for (unsigned i = 0, e = NMD->getNumElements(); i != e; ++i) Elems.push_back(NMD->getElement(i)); @@ -245,7 +176,6 @@ void NamedMDNode::eraseFromParent() { /// dropAllReferences - Remove all uses and clear node vector. void NamedMDNode::dropAllReferences() { - User::dropAllReferences(); Node.clear(); } @@ -254,65 +184,100 @@ NamedMDNode::~NamedMDNode() { } //===----------------------------------------------------------------------===// -//Metadata implementation +// MetadataContextImpl implementation. // +namespace llvm { +class MetadataContextImpl { +public: + typedef std::pair<unsigned, TrackingVH<MDNode> > MDPairTy; + typedef SmallVector<MDPairTy, 2> MDMapTy; + typedef DenseMap<const Instruction *, MDMapTy> MDStoreTy; + friend class BitcodeReader; +private: + + /// MetadataStore - Collection of metadata used in this context. + MDStoreTy MetadataStore; + + /// MDHandlerNames - Map to hold metadata handler names. + StringMap<unsigned> MDHandlerNames; + +public: + /// registerMDKind - Register a new metadata kind and return its ID. + /// A metadata kind can be registered only once. + unsigned registerMDKind(StringRef Name); + + /// getMDKind - Return metadata kind. If the requested metadata kind + /// is not registered then return 0. + unsigned getMDKind(StringRef Name) const; + + /// getMD - Get the metadata of given kind attached to an Instruction. + /// If the metadata is not found then return 0. + MDNode *getMD(unsigned Kind, const Instruction *Inst); + + /// getMDs - Get the metadata attached to an Instruction. + void getMDs(const Instruction *Inst, SmallVectorImpl<MDPairTy> &MDs) const; + + /// addMD - Attach the metadata of given kind to an Instruction. + void addMD(unsigned Kind, MDNode *Node, Instruction *Inst); + + /// removeMD - Remove metadata of given kind attached with an instuction. + void removeMD(unsigned Kind, Instruction *Inst); + + /// removeAllMetadata - Remove all metadata attached with an instruction. + void removeAllMetadata(Instruction *Inst); + + /// copyMD - If metadata is attached with Instruction In1 then attach + /// the same metadata to In2. + void copyMD(Instruction *In1, Instruction *In2); + + /// getHandlerNames - Populate client supplied smallvector using custome + /// metadata name and ID. + void getHandlerNames(SmallVectorImpl<std::pair<unsigned, StringRef> >&) const; + + /// ValueIsDeleted - This handler is used to update metadata store + /// when a value is deleted. + void ValueIsDeleted(const Value *) {} + void ValueIsDeleted(Instruction *Inst) { + removeAllMetadata(Inst); + } + void ValueIsRAUWd(Value *V1, Value *V2); -/// RegisterMDKind - Register a new metadata kind and return its ID. -/// A metadata kind can be registered only once. -unsigned MetadataContext::RegisterMDKind(const char *Name) { - assert (validName(Name) && "Invalid custome metadata name!"); - unsigned Count = MDHandlerNames.size(); - assert(MDHandlerNames.find(Name) == MDHandlerNames.end() - && "Already registered MDKind!"); - MDHandlerNames[Name] = Count + 1; - return Count + 1; + /// ValueIsCloned - This handler is used to update metadata store + /// when In1 is cloned to create In2. + void ValueIsCloned(const Instruction *In1, Instruction *In2); +}; } -/// validName - Return true if Name is a valid custom metadata handler name. -bool MetadataContext::validName(const char *Name) { - if (!Name) - return false; - - if (!isalpha(*Name)) - return false; - - unsigned Length = strlen(Name); - unsigned Count = 1; - ++Name; - while (Name && - (isalnum(*Name) || *Name == '_' || *Name == '-' || *Name == '.')) { - ++Name; - ++Count; - } - if (Length != Count) - return false; - return true; +/// registerMDKind - Register a new metadata kind and return its ID. +/// A metadata kind can be registered only once. +unsigned MetadataContextImpl::registerMDKind(StringRef Name) { + unsigned Count = MDHandlerNames.size(); + assert(MDHandlerNames.count(Name) == 0 && "Already registered MDKind!"); + return MDHandlerNames[Name] = Count + 1; } /// getMDKind - Return metadata kind. If the requested metadata kind /// is not registered then return 0. -unsigned MetadataContext::getMDKind(const char *Name) { - assert (validName(Name) && "Invalid custome metadata name!"); - StringMap<unsigned>::iterator I = MDHandlerNames.find(Name); +unsigned MetadataContextImpl::getMDKind(StringRef Name) const { + StringMap<unsigned>::const_iterator I = MDHandlerNames.find(Name); if (I == MDHandlerNames.end()) return 0; return I->getValue(); } -/// addMD - Attach the metadata of given kind with an Instruction. -void MetadataContext::addMD(unsigned MDKind, MDNode *Node, Instruction *Inst) { - assert (Node && "Unable to add custome metadata"); +/// addMD - Attach the metadata of given kind to an Instruction. +void MetadataContextImpl::addMD(unsigned MDKind, MDNode *Node, + Instruction *Inst) { + assert(Node && "Invalid null MDNode"); Inst->HasMetadata = true; - MDStoreTy::iterator I = MetadataStore.find(Inst); - if (I == MetadataStore.end()) { - MDMapTy Info; + MDMapTy &Info = MetadataStore[Inst]; + if (Info.empty()) { Info.push_back(std::make_pair(MDKind, Node)); MetadataStore.insert(std::make_pair(Inst, Info)); return; } - MDMapTy &Info = I->second; // If there is an entry for this MDKind then replace it. for (unsigned i = 0, e = Info.size(); i != e; ++i) { MDPairTy &P = Info[i]; @@ -324,11 +289,10 @@ void MetadataContext::addMD(unsigned MDKind, MDNode *Node, Instruction *Inst) { // Otherwise add a new entry. Info.push_back(std::make_pair(MDKind, Node)); - return; } /// removeMD - Remove metadata of given kind attached with an instuction. -void MetadataContext::removeMD(unsigned Kind, Instruction *Inst) { +void MetadataContextImpl::removeMD(unsigned Kind, Instruction *Inst) { MDStoreTy::iterator I = MetadataStore.find(Inst); if (I == MetadataStore.end()) return; @@ -341,87 +305,80 @@ void MetadataContext::removeMD(unsigned Kind, Instruction *Inst) { return; } } - - return; } -/// removeMDs - Remove all metadata attached with an instruction. -void MetadataContext::removeMDs(const Instruction *Inst) { - // Find Metadata handles for this instruction. - MDStoreTy::iterator I = MetadataStore.find(Inst); - assert (I != MetadataStore.end() && "Invalid custom metadata info!"); - MDMapTy &Info = I->second; - - // FIXME : Give all metadata handlers a chance to adjust. - - // Remove the entries for this instruction. - Info.clear(); - MetadataStore.erase(I); +/// removeAllMetadata - Remove all metadata attached with an instruction. +void MetadataContextImpl::removeAllMetadata(Instruction *Inst) { + MetadataStore.erase(Inst); + Inst->HasMetadata = false; } /// copyMD - If metadata is attached with Instruction In1 then attach /// the same metadata to In2. -void MetadataContext::copyMD(Instruction *In1, Instruction *In2) { - assert (In1 && In2 && "Invalid instruction!"); - MDStoreTy::iterator I = MetadataStore.find(In1); - if (I == MetadataStore.end()) +void MetadataContextImpl::copyMD(Instruction *In1, Instruction *In2) { + assert(In1 && In2 && "Invalid instruction!"); + MDMapTy &In1Info = MetadataStore[In1]; + if (In1Info.empty()) return; - MDMapTy &In1Info = I->second; - MDMapTy In2Info; for (MDMapTy::iterator I = In1Info.begin(), E = In1Info.end(); I != E; ++I) - if (MDNode *MD = dyn_cast_or_null<MDNode>(I->second)) - addMD(I->first, MD, In2); + addMD(I->first, I->second, In2); } -/// getMD - Get the metadata of given kind attached with an Instruction. +/// getMD - Get the metadata of given kind attached to an Instruction. /// If the metadata is not found then return 0. -MDNode *MetadataContext::getMD(unsigned MDKind, const Instruction *Inst) { - MDStoreTy::iterator I = MetadataStore.find(Inst); - if (I == MetadataStore.end()) +MDNode *MetadataContextImpl::getMD(unsigned MDKind, const Instruction *Inst) { + MDMapTy &Info = MetadataStore[Inst]; + if (Info.empty()) return NULL; - - MDMapTy &Info = I->second; + for (MDMapTy::iterator I = Info.begin(), E = Info.end(); I != E; ++I) if (I->first == MDKind) - return dyn_cast_or_null<MDNode>(I->second); + return I->second; return NULL; } -/// getMDs - Get the metadata attached with an Instruction. -const MetadataContext::MDMapTy *MetadataContext::getMDs(const Instruction *Inst) { +/// getMDs - Get the metadata attached to an Instruction. +void MetadataContextImpl:: +getMDs(const Instruction *Inst, SmallVectorImpl<MDPairTy> &MDs) const { MDStoreTy::iterator I = MetadataStore.find(Inst); if (I == MetadataStore.end()) - return NULL; - - return &(I->second); + return; + for (MDMapTy::iterator MI = I->second.begin(), ME = I->second.end(); + MI != ME; ++MI) + MDs.push_back(std::make_pair(MI->first, MI->second)); + std::sort(MDs.begin(), MDs.end()); } -/// getHandlerNames - Get handler names. This is used by bitcode -/// writer. -const StringMap<unsigned> *MetadataContext::getHandlerNames() { - return &MDHandlerNames; +/// getHandlerNames - Populate client supplied smallvector using custome +/// metadata name and ID. +void MetadataContextImpl:: +getHandlerNames(SmallVectorImpl<std::pair<unsigned, StringRef> >&Names) const { + for (StringMap<unsigned>::const_iterator I = MDHandlerNames.begin(), + E = MDHandlerNames.end(); I != E; ++I) + Names.push_back(std::make_pair(I->second, I->first())); + std::sort(Names.begin(), Names.end()); } /// ValueIsCloned - This handler is used to update metadata store /// when In1 is cloned to create In2. -void MetadataContext::ValueIsCloned(const Instruction *In1, Instruction *In2) { +void MetadataContextImpl::ValueIsCloned(const Instruction *In1, + Instruction *In2) { // Find Metadata handles for In1. MDStoreTy::iterator I = MetadataStore.find(In1); - assert (I != MetadataStore.end() && "Invalid custom metadata info!"); + assert(I != MetadataStore.end() && "Invalid custom metadata info!"); // FIXME : Give all metadata handlers a chance to adjust. MDMapTy &In1Info = I->second; MDMapTy In2Info; for (MDMapTy::iterator I = In1Info.begin(), E = In1Info.end(); I != E; ++I) - if (MDNode *MD = dyn_cast_or_null<MDNode>(I->second)) - addMD(I->first, MD, In2); + addMD(I->first, I->second, In2); } /// ValueIsRAUWd - This handler is used when V1's all uses are replaced by /// V2. -void MetadataContext::ValueIsRAUWd(Value *V1, Value *V2) { +void MetadataContextImpl::ValueIsRAUWd(Value *V1, Value *V2) { Instruction *I1 = dyn_cast<Instruction>(V1); Instruction *I2 = dyn_cast<Instruction>(V2); if (!I1 || !I2) @@ -431,3 +388,94 @@ void MetadataContext::ValueIsRAUWd(Value *V1, Value *V2) { ValueIsCloned(I1, I2); } +//===----------------------------------------------------------------------===// +// MetadataContext implementation. +// +MetadataContext::MetadataContext() + : pImpl(new MetadataContextImpl()) { } +MetadataContext::~MetadataContext() { delete pImpl; } + +/// isValidName - Return true if Name is a valid custom metadata handler name. +bool MetadataContext::isValidName(StringRef MDName) { + if (MDName.empty()) + return false; + + if (!isalpha(MDName[0])) + return false; + + for (StringRef::iterator I = MDName.begin() + 1, E = MDName.end(); I != E; + ++I) { + if (!isalnum(*I) && *I != '_' && *I != '-' && *I != '.') + return false; + } + return true; +} + +/// registerMDKind - Register a new metadata kind and return its ID. +/// A metadata kind can be registered only once. +unsigned MetadataContext::registerMDKind(StringRef Name) { + assert(isValidName(Name) && "Invalid custome metadata name!"); + return pImpl->registerMDKind(Name); +} + +/// getMDKind - Return metadata kind. If the requested metadata kind +/// is not registered then return 0. +unsigned MetadataContext::getMDKind(StringRef Name) const { + return pImpl->getMDKind(Name); +} + +/// getMD - Get the metadata of given kind attached to an Instruction. +/// If the metadata is not found then return 0. +MDNode *MetadataContext::getMD(unsigned Kind, const Instruction *Inst) { + return pImpl->getMD(Kind, Inst); +} + +/// getMDs - Get the metadata attached to an Instruction. +void MetadataContext:: +getMDs(const Instruction *Inst, + SmallVectorImpl<std::pair<unsigned, TrackingVH<MDNode> > > &MDs) const { + return pImpl->getMDs(Inst, MDs); +} + +/// addMD - Attach the metadata of given kind to an Instruction. +void MetadataContext::addMD(unsigned Kind, MDNode *Node, Instruction *Inst) { + pImpl->addMD(Kind, Node, Inst); +} + +/// removeMD - Remove metadata of given kind attached with an instuction. +void MetadataContext::removeMD(unsigned Kind, Instruction *Inst) { + pImpl->removeMD(Kind, Inst); +} + +/// removeAllMetadata - Remove all metadata attached with an instruction. +void MetadataContext::removeAllMetadata(Instruction *Inst) { + pImpl->removeAllMetadata(Inst); +} + +/// copyMD - If metadata is attached with Instruction In1 then attach +/// the same metadata to In2. +void MetadataContext::copyMD(Instruction *In1, Instruction *In2) { + pImpl->copyMD(In1, In2); +} + +/// getHandlerNames - Populate client supplied smallvector using custome +/// metadata name and ID. +void MetadataContext:: +getHandlerNames(SmallVectorImpl<std::pair<unsigned, StringRef> >&N) const { + pImpl->getHandlerNames(N); +} + +/// ValueIsDeleted - This handler is used to update metadata store +/// when a value is deleted. +void MetadataContext::ValueIsDeleted(Instruction *Inst) { + pImpl->ValueIsDeleted(Inst); +} +void MetadataContext::ValueIsRAUWd(Value *V1, Value *V2) { + pImpl->ValueIsRAUWd(V1, V2); +} + +/// ValueIsCloned - This handler is used to update metadata store +/// when In1 is cloned to create In2. +void MetadataContext::ValueIsCloned(const Instruction *In1, Instruction *In2) { + pImpl->ValueIsCloned(In1, In2); +} diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp index a2831d3..a17eed8 100644 --- a/lib/VMCore/Pass.cpp +++ b/lib/VMCore/Pass.cpp @@ -18,6 +18,7 @@ #include "llvm/Module.h" #include "llvm/ModuleProvider.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Atomic.h" diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp index ba72af6..35ec9be 100644 --- a/lib/VMCore/Value.cpp +++ b/lib/VMCore/Value.cpp @@ -27,7 +27,6 @@ #include "llvm/Support/LeakDetector.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/ValueHandle.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/System/RWMutex.h" #include "llvm/System/Threading.h" #include "llvm/ADT/DenseMap.h" diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 75ea4c3..3bfd47c 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -339,10 +339,10 @@ namespace { void WriteValue(const Value *V) { if (!V) return; if (isa<Instruction>(V)) { - MessagesStr << *V; + MessagesStr << *V << '\n'; } else { WriteAsOperand(MessagesStr, V, true, Mod); - MessagesStr << "\n"; + MessagesStr << '\n'; } } |