diff options
author | rdivacky <rdivacky@FreeBSD.org> | 2010-04-02 08:54:30 +0000 |
---|---|---|
committer | rdivacky <rdivacky@FreeBSD.org> | 2010-04-02 08:54:30 +0000 |
commit | 20e856b2a58d12231aa42d5d13888b15ac03e5a4 (patch) | |
tree | cf5763d092b81cecc168fa28032247ee495d06e2 /lib | |
parent | 2f2afc1aae898651e26987a5c71f3febb19bca98 (diff) | |
download | FreeBSD-src-20e856b2a58d12231aa42d5d13888b15ac03e5a4.zip FreeBSD-src-20e856b2a58d12231aa42d5d13888b15ac03e5a4.tar.gz |
Update LLVM to r100181.
Diffstat (limited to 'lib')
198 files changed, 7087 insertions, 4629 deletions
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index 8767c18..0478258 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -49,7 +49,7 @@ bool llvm::PointerMayBeCaptured(const Value *V, SmallSet<Use*, Threshold> Visited; int Count = 0; - for (Value::use_const_iterator UI = V->use_begin(), UE = V->use_end(); + for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; ++UI) { // If there are lots of uses, conservatively say that the value // is captured to avoid taking too much compile time. diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index fda69ac..f12552d 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -96,9 +96,8 @@ DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const { if (DbgNode == 0) return DIDescriptor(); - if (Elt < DbgNode->getNumOperands() && DbgNode->getOperand(Elt)) - return DIDescriptor(dyn_cast<MDNode>(DbgNode->getOperand(Elt))); - + if (Elt < DbgNode->getNumOperands()) + return DIDescriptor(dyn_cast_or_null<MDNode>(DbgNode->getOperand(Elt))); return DIDescriptor(); } diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index 0e333d1..0f39f44 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -17,12 +17,13 @@ #define DEBUG_TYPE "cgscc-passmgr" #include "llvm/CallGraphSCCPass.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Function.h" +#include "llvm/PassManagers.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/ADT/SCCIterator.h" -#include "llvm/PassManagers.h" -#include "llvm/Function.h" #include "llvm/Support/Debug.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -102,9 +103,10 @@ bool CGPassManager::RunPassOnSCC(Pass *P, std::vector<CallGraphNode*> &CurSCC, CallGraphUpToDate = true; } - Timer *T = StartPassTimer(CGSP); - Changed = CGSP->runOnSCC(CurSCC); - StopPassTimer(CGSP, T); + { + TimeRegion PassTimer(getPassTimer(CGSP)); + Changed = CGSP->runOnSCC(CurSCC); + } // After the CGSCCPass is done, when assertions are enabled, use // RefreshCallGraph to verify that the callgraph was correctly updated. @@ -125,9 +127,8 @@ bool CGPassManager::RunPassOnSCC(Pass *P, std::vector<CallGraphNode*> &CurSCC, for (unsigned i = 0, e = CurSCC.size(); i != e; ++i) { if (Function *F = CurSCC[i]->getFunction()) { dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName()); - Timer *T = StartPassTimer(FPP); + TimeRegion PassTimer(getPassTimer(FPP)); Changed |= FPP->runOnFunction(*F); - StopPassTimer(FPP, T); } } diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index 7b43089..b14afa3 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -257,7 +257,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) { // Make sure that this is just the function being called, not that it is // passing into the function. - for (unsigned i = 3, e = II->getNumOperands(); i != e; ++i) + for (unsigned i = 0, e = II->getNumOperands() - 3; i != e; ++i) if (II->getOperand(i) == V) return true; } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) { if (CE->getOpcode() == Instruction::GetElementPtr || diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 5b8b534..c599e90 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -255,9 +255,11 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Caller = TheCall->getParent()->getParent(); // Don't inline functions which can be redefined at link-time to mean - // something else. Don't inline functions marked noinline. + // something else. Don't inline functions marked noinline or call sites + // marked noinline. if (Callee->mayBeOverridden() || - Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee)) + Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) || + CS.isNoInline()) return llvm::InlineCost::getNever(); // InlineCost - This value measures how good of an inline candidate this call diff --git a/lib/Analysis/LiveValues.cpp b/lib/Analysis/LiveValues.cpp index 1b91d93..23964ff 100644 --- a/lib/Analysis/LiveValues.cpp +++ b/lib/Analysis/LiveValues.cpp @@ -125,7 +125,7 @@ LiveValues::Memo &LiveValues::compute(const Value *V) { bool LiveOutOfDefBB = false; // Examine each use of the value. - for (Value::use_const_iterator I = V->use_begin(), E = V->use_end(); + for (Value::const_use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) { const User *U = *I; const BasicBlock *UseBB = cast<Instruction>(U)->getParent(); diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index 2d613f6..e2d2c2b 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopPass.h" +#include "llvm/Support/Timer.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -221,22 +222,22 @@ bool LPPassManager::runOnFunction(Function &F) { LoopPass *P = (LoopPass*)getContainedPass(Index); dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG, - CurrentLoop->getHeader()->getNameStr()); + CurrentLoop->getHeader()->getName()); dumpRequiredSet(P); initializeAnalysisImpl(P); { PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader()); - Timer *T = StartPassTimer(P); + TimeRegion PassTimer(getPassTimer(P)); + Changed |= P->runOnLoop(CurrentLoop, *this); - StopPassTimer(P, T); } if (Changed) dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG, skipThisLoop ? "<deleted>" : - CurrentLoop->getHeader()->getNameStr()); + CurrentLoop->getHeader()->getName()); dumpPreservedSet(P); if (!skipThisLoop) { @@ -245,9 +246,10 @@ bool LPPassManager::runOnFunction(Function &F) { // is a function pass and it's really expensive to verify every // loop in the function every time. That level of checking can be // enabled with the -verify-loop-info option. - Timer *T = StartPassTimer(LI); - CurrentLoop->verifyLoop(); - StopPassTimer(LI, T); + { + TimeRegion PassTimer(getPassTimer(LI)); + CurrentLoop->verifyLoop(); + } // Then call the regular verifyAnalysis functions. verifyPreservedAnalysis(P); @@ -257,7 +259,7 @@ bool LPPassManager::runOnFunction(Function &F) { recordAvailableAnalysis(P); removeDeadPasses(P, skipThisLoop ? "<deleted>" : - CurrentLoop->getHeader()->getNameStr(), + CurrentLoop->getHeader()->getName(), ON_LOOP_MSG); if (skipThisLoop) diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 297b588..89f9743 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -139,7 +139,7 @@ const PointerType *llvm::getMallocType(const CallInst *CI) { unsigned NumOfBitCastUses = 0; // Determine if CallInst has a bitcast use. - for (Value::use_const_iterator UI = CI->use_begin(), E = CI->use_end(); + for (Value::const_use_iterator UI = CI->use_begin(), E = CI->use_end(); UI != E; ) if (const BitCastInst *BCI = dyn_cast<BitCastInst>(*UI++)) { MallocType = cast<PointerType>(BCI->getDestTy()); diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp index bce6b31..da4ce47 100644 --- a/lib/Analysis/ProfileEstimatorPass.cpp +++ b/lib/Analysis/ProfileEstimatorPass.cpp @@ -398,7 +398,7 @@ bool ProfileEstimatorPass::runOnFunction(Function &F) { for (Function::const_iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { const BasicBlock *BB = &(*FI); BlockInformation[&F][BB] = 0; - pred_const_iterator predi = pred_begin(BB), prede = pred_end(BB); + const_pred_iterator predi = pred_begin(BB), prede = pred_end(BB); if (predi == prede) { Edge e = getEdge(0,BB); setEdgeWeight(e,0); diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp index 85531be..662576e 100644 --- a/lib/Analysis/ProfileInfo.cpp +++ b/lib/Analysis/ProfileInfo.cpp @@ -67,7 +67,7 @@ ProfileInfoT<Function,BasicBlock>::getExecutionCount(const BasicBlock *BB) { double Count = MissingValue; - pred_const_iterator PI = pred_begin(BB), PE = pred_end(BB); + const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB); // Are there zero predecessors of this block? if (PI == PE) { @@ -508,7 +508,7 @@ bool ProfileInfoT<Function,BasicBlock>:: // have no value double incount = 0; SmallSet<const BasicBlock*,8> pred_visited; - pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); if (bbi==bbe) { Edge e = getEdge(0,BB); incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated); @@ -582,7 +582,7 @@ bool ProfileInfoT<Function,BasicBlock>::EstimateMissingEdges(const BasicBlock *B double inWeight = 0; std::set<Edge> inMissing; std::set<const BasicBlock*> ProcessedPreds; - pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); if (bbi == bbe) { readEdge(this,getEdge(0,BB),inWeight,inMissing); } @@ -639,7 +639,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) { // FI != FE; ++FI) { // const BasicBlock* BB = &(*FI); // { -// pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB); +// const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); // if (NBB == End) { // setEdgeWeight(getEdge(0,BB),0); // } @@ -779,7 +779,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) { // Calculate incoming flow. double iw = 0; unsigned inmissing = 0; unsigned incount = 0; unsigned invalid = 0; std::set<const BasicBlock *> Processed; - for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB); + for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); NBB != End; ++NBB) { if (Processed.insert(*NBB).second) { Edge e = getEdge(*NBB, BB); @@ -869,7 +869,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) { if (getEdgeWeight(e) == MissingValue) { double iw = 0; std::set<const BasicBlock *> Processed; - for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB); + for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); NBB != End; ++NBB) { if (Processed.insert(*NBB).second) { Edge e = getEdge(*NBB, BB); @@ -893,7 +893,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) { const BasicBlock *Dest; Path P; bool BackEdgeFound = false; - for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB); + for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); NBB != End; ++NBB) { Dest = GetPath(BB, *NBB, P, GetPathToDest | GetPathWithNewEdges); if (Dest == *NBB) { @@ -935,7 +935,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) { // Calculate incoming flow. double iw = 0; std::set<const BasicBlock *> Processed; - for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB); + for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); NBB != End; ++NBB) { if (Processed.insert(*NBB).second) { Edge e = getEdge(*NBB, BB); @@ -965,7 +965,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) { while(FI != FE && !FoundPath) { const BasicBlock *BB = *FI; ++FI; - for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB); + for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); NBB != End; ++NBB) { Edge e = getEdge(*NBB,BB); double w = getEdgeWeight(e); diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp index ac9ed52..8ea4ecf 100644 --- a/lib/Analysis/ProfileInfoLoaderPass.cpp +++ b/lib/Analysis/ProfileInfoLoaderPass.cpp @@ -119,7 +119,7 @@ void LoaderPass::recurseBasicBlock(const BasicBlock *BB) { bbi != bbe; ++bbi) { recurseBasicBlock(*bbi); } - for (pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); bbi != bbe; ++bbi) { recurseBasicBlock(*bbi); } diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp index a2ddc8e..5d87e14 100644 --- a/lib/Analysis/ProfileVerifierPass.cpp +++ b/lib/Analysis/ProfileVerifierPass.cpp @@ -96,8 +96,8 @@ namespace llvm { double inWeight = 0; int inCount = 0; std::set<const BType*> ProcessedPreds; - for ( pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB); - bbi != bbe; ++bbi ) { + for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + bbi != bbe; ++bbi ) { if (ProcessedPreds.insert(*bbi).second) { typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(*bbi,BB); double EdgeWeight = PI->getEdgeWeight(E); @@ -242,7 +242,7 @@ namespace llvm { // Read predecessors. std::set<const BType*> ProcessedPreds; - pred_const_iterator bpi = pred_begin(BB), bpe = pred_end(BB); + const_pred_iterator bpi = pred_begin(BB), bpe = pred_end(BB); // If there are none, check for (0,BB) edge. if (bpi == bpe) { DI.inWeight += ReadOrAssert(PI->getEdge(0,BB)); diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 138cdc6..2e18cea 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1268,19 +1268,8 @@ Value *SCEVExpander::expand(const SCEV *S) { L = L->getParentLoop()) if (S->isLoopInvariant(L)) { if (!L) break; - if (BasicBlock *Preheader = L->getLoopPreheader()) { + if (BasicBlock *Preheader = L->getLoopPreheader()) InsertPt = Preheader->getTerminator(); - BasicBlock::iterator IP = InsertPt; - // Back past any debug info instructions. Sometimes we inserted - // something earlier before debug info but after any real instructions. - // This should behave the same as if debug info was not present. - while (IP != Preheader->begin()) { - --IP; - if (!isa<DbgInfoIntrinsic>(IP)) - break; - InsertPt = IP; - } - } } else { // If the SCEV is computable at this level, insert it into the header // after the PHIs (and after any other instructions that we've inserted diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp index 58fbbf4..a02601a 100644 --- a/lib/Archive/ArchiveWriter.cpp +++ b/lib/Archive/ArchiveWriter.cpp @@ -220,7 +220,7 @@ Archive::writeMember( } // Now that we have the data in memory, update the - // symbol table if its a bitcode file. + // symbol table if it's a bitcode file. if (CreateSymbolTable && member.isBitcode()) { std::vector<std::string> symbols; std::string FullMemberName = archPath.str() + "(" + member.getPath().str() diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h index 3057992..70f1cfd 100644 --- a/lib/AsmParser/LLLexer.h +++ b/lib/AsmParser/LLLexer.h @@ -55,7 +55,7 @@ namespace llvm { typedef SMLoc LocTy; LocTy getLoc() const { return SMLoc::getFromPointer(TokStart); } lltok::Kind getKind() const { return CurKind; } - const std::string getStrVal() const { return StrVal; } + const std::string &getStrVal() const { return StrVal; } const Type *getTyVal() const { return TyVal; } unsigned getUIntVal() const { return UIntVal; } const APSInt &getAPSIntVal() const { return APSIntVal; } diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 8083a07..cdad077 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -39,6 +39,27 @@ bool LLParser::Run() { /// ValidateEndOfModule - Do final validity and sanity checks at the end of the /// module. bool LLParser::ValidateEndOfModule() { + // Handle any instruction metadata forward references. + if (!ForwardRefInstMetadata.empty()) { + for (DenseMap<Instruction*, std::vector<MDRef> >::iterator + I = ForwardRefInstMetadata.begin(), E = ForwardRefInstMetadata.end(); + I != E; ++I) { + Instruction *Inst = I->first; + const std::vector<MDRef> &MDList = I->second; + + for (unsigned i = 0, e = MDList.size(); i != e; ++i) { + unsigned SlotNo = MDList[i].MDSlot; + + if (SlotNo >= NumberedMetadata.size() || NumberedMetadata[SlotNo] == 0) + return Error(MDList[i].Loc, "use of undefined metadata '!" + + utostr(SlotNo) + "'"); + Inst->setMetadata(MDList[i].MDKind, NumberedMetadata[SlotNo]); + } + } + ForwardRefInstMetadata.clear(); + } + + // Update auto-upgraded malloc calls to "malloc". // FIXME: Remove in LLVM 3.0. if (MallocF) { @@ -472,18 +493,30 @@ bool LLParser::ParseMDString(MDString *&Result) { // MDNode: // ::= '!' MDNodeNumber +// +/// This version of ParseMDNodeID returns the slot number and null in the case +/// of a forward reference. +bool LLParser::ParseMDNodeID(MDNode *&Result, unsigned &SlotNo) { + // !{ ..., !42, ... } + if (ParseUInt32(SlotNo)) return true; + + // Check existing MDNode. + if (SlotNo < NumberedMetadata.size() && NumberedMetadata[SlotNo] != 0) + Result = NumberedMetadata[SlotNo]; + else + Result = 0; + return false; +} + bool LLParser::ParseMDNodeID(MDNode *&Result) { // !{ ..., !42, ... } unsigned MID = 0; - if (ParseUInt32(MID)) return true; + if (ParseMDNodeID(Result, MID)) return true; - // Check existing MDNode. - if (MID < NumberedMetadata.size() && NumberedMetadata[MID] != 0) { - Result = NumberedMetadata[MID]; - return false; - } + // If not a forward reference, just return it now. + if (Result) return false; - // Create MDNode forward reference. + // Otherwise, create MDNode forward reference. // FIXME: This is not unique enough! std::string FwdRefName = "llvm.mdnode.fwdref." + utostr(MID); @@ -1078,9 +1111,7 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) { /// ParseInstructionMetadata /// ::= !dbg !42 (',' !dbg !57)* -bool LLParser:: -ParseInstructionMetadata(SmallVectorImpl<std::pair<unsigned, - MDNode *> > &Result){ +bool LLParser::ParseInstructionMetadata(Instruction *Inst) { do { if (Lex.getKind() != lltok::MetadataVar) return TokError("expected metadata after comma"); @@ -1089,12 +1120,21 @@ ParseInstructionMetadata(SmallVectorImpl<std::pair<unsigned, Lex.Lex(); MDNode *Node; + unsigned NodeID; + SMLoc Loc = Lex.getLoc(); if (ParseToken(lltok::exclaim, "expected '!' here") || - ParseMDNodeID(Node)) + ParseMDNodeID(Node, NodeID)) return true; unsigned MDK = M->getMDKindID(Name.c_str()); - Result.push_back(std::make_pair(MDK, Node)); + if (Node) { + // If we got the node, add it to the instruction. + Inst->setMetadata(MDK, Node); + } else { + MDRef R = { Loc, MDK, NodeID }; + // Otherwise, remember that this should be resolved later. + ForwardRefInstMetadata[Inst].push_back(R); + } // If this is the end of the list, we're done. } while (EatIfPresent(lltok::comma)); @@ -2896,22 +2936,17 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) { // With a normal result, we check to see if the instruction is followed by // a comma and metadata. if (EatIfPresent(lltok::comma)) - if (ParseInstructionMetadata(MetadataOnInst)) + if (ParseInstructionMetadata(Inst)) return true; break; case InstExtraComma: // If the instruction parser ate an extra comma at the end of it, it // *must* be followed by metadata. - if (ParseInstructionMetadata(MetadataOnInst)) + if (ParseInstructionMetadata(Inst)) return true; break; } - // Set metadata attached with this instruction. - for (unsigned i = 0, e = MetadataOnInst.size(); i != e; ++i) - Inst->setMetadata(MetadataOnInst[i].first, MetadataOnInst[i].second); - MetadataOnInst.clear(); - BB->getInstList().push_back(Inst); // Set the name on the instruction. diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 9abe404..ae460bb 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -17,6 +17,7 @@ #include "LLLexer.h" #include "llvm/Module.h" #include "llvm/Type.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/Support/ValueHandle.h" #include <map> @@ -76,6 +77,14 @@ namespace llvm { LLVMContext& Context; LLLexer Lex; Module *M; + + // Instruction metadata resolution. Each instruction can have a list of + // MDRef info associated with them. + struct MDRef { + SMLoc Loc; + unsigned MDKind, MDSlot; + }; + DenseMap<Instruction*, std::vector<MDRef> > ForwardRefInstMetadata; // Type resolution handling data structures. std::map<std::string, std::pair<PATypeHolder, LocTy> > ForwardRefTypes; @@ -171,8 +180,7 @@ namespace llvm { bool ParseOptionalCallingConv(CallingConv::ID &CC); bool ParseOptionalAlignment(unsigned &Alignment); bool ParseOptionalStackAlignment(unsigned &Alignment); - bool ParseInstructionMetadata(SmallVectorImpl<std::pair<unsigned, - MDNode *> > &); + bool ParseInstructionMetadata(Instruction *Inst); bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma); bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,bool &AteExtraComma); bool ParseIndexList(SmallVectorImpl<unsigned> &Indices) { @@ -204,6 +212,7 @@ namespace llvm { bool ParseNamedMetadata(); bool ParseMDString(MDString *&Result); bool ParseMDNodeID(MDNode *&Result); + bool ParseMDNodeID(MDNode *&Result, unsigned &SlotNo); // Type Parsing. bool ParseType(PATypeHolder &Result, bool AllowVoid = false); diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index b9453c9..76d112e 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1527,12 +1527,16 @@ bool BitcodeReader::ParseModule() { bool BitcodeReader::ParseBitcodeInto(Module *M) { TheModule = 0; - if (Buffer->getBufferSize() & 3) - return Error("Bitcode stream should be a multiple of 4 bytes in length"); - unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart(); unsigned char *BufEnd = BufPtr+Buffer->getBufferSize(); + if (Buffer->getBufferSize() & 3) { + if (!isRawBitcode(BufPtr, BufEnd) && !isBitcodeWrapper(BufPtr, BufEnd)) + return Error("Invalid bitcode signature"); + else + return Error("Bitcode stream should be a multiple of 4 bytes in length"); + } + // If we have a wrapper header, parse it and ignore the non-bc file contents. // The magic number is 0x0B17C0DE stored in little endian. if (isBitcodeWrapper(BufPtr, BufEnd)) diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 3ab2726..1f69e16 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1090,11 +1090,11 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, // Emit value #'s for the fixed parameters. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) - Vals.push_back(VE.getValueID(I.getOperand(i+3))); // fixed param. + Vals.push_back(VE.getValueID(I.getOperand(i))); // fixed param. // Emit type/value pairs for varargs params. if (FTy->isVarArg()) { - for (unsigned i = 3+FTy->getNumParams(), e = I.getNumOperands(); + for (unsigned i = FTy->getNumParams(), e = I.getNumOperands()-3; i != e; ++i) PushValueAndType(I.getOperand(i), InstID, Vals, VE); // vararg } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 1d4f7f7..3e71d18 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -62,7 +62,7 @@ AsmPrinter::AsmPrinter(formatted_raw_ostream &o, TargetMachine &tm, TM(tm), MAI(tm.getMCAsmInfo()), TRI(tm.getRegisterInfo()), OutContext(Streamer.getContext()), OutStreamer(Streamer), - LastMI(0), LastFn(0), Counter(~0U), SetCounter(0), PrevDLT(NULL) { + LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) { DW = 0; MMI = 0; VerboseAsm = Streamer.isVerboseAsm(); } @@ -922,8 +922,8 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, // Otherwise, emit with .set (aka assignment). MCSymbol *SetLabel = - OutContext.GetOrCreateTemporarySymbol(Twine(MAI->getPrivateGlobalPrefix()) + - "set" + Twine(SetCounter++)); + OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + + "set" + Twine(SetCounter++)); OutStreamer.EmitAssignment(SetLabel, Diff); OutStreamer.EmitSymbolValue(SetLabel, Size, 0/*AddrSpace*/); } @@ -1337,25 +1337,12 @@ void AsmPrinter::processDebugLoc(const MachineInstr *MI, if (!MAI || !DW || !MAI->doesSupportDebugInformation() || !DW->ShouldEmitDwarfDebug()) return; - if (MI->getOpcode() == TargetOpcode::DBG_VALUE) - return; - DebugLoc DL = MI->getDebugLoc(); - if (DL.isUnknown()) - return; - DILocation CurDLT = MF->getDILocation(DL); - if (!CurDLT.getScope().Verify()) - return; - if (!BeforePrintingInsn) { + if (!BeforePrintingInsn) // After printing instruction DW->EndScope(MI); - } else if (CurDLT.getNode() != PrevDLT) { - MCSymbol *L = DW->RecordSourceLine(CurDLT.getLineNumber(), - CurDLT.getColumnNumber(), - CurDLT.getScope().getNode()); - DW->BeginScope(MI, L); - PrevDLT = CurDLT.getNode(); - } + else + DW->BeginScope(MI); } @@ -1612,7 +1599,7 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const { /// GetCPISymbol - Return the symbol for the specified constant pool entry. MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const { - return OutContext.GetOrCreateTemporarySymbol + return OutContext.GetOrCreateSymbol (Twine(MAI->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber()) + "_" + Twine(CPID)); } @@ -1625,7 +1612,7 @@ MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const { /// GetJTSetSymbol - Return the symbol for the specified jump table .set /// FIXME: privatize to AsmPrinter. MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const { - return OutContext.GetOrCreateTemporarySymbol + return OutContext.GetOrCreateSymbol (Twine(MAI->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" + Twine(UID) + "_set_" + Twine(MBBID)); } @@ -1639,9 +1626,7 @@ MCSymbol *AsmPrinter::GetSymbolWithGlobalValueBase(const GlobalValue *GV, SmallString<60> NameStr; Mang->getNameWithPrefix(NameStr, GV, ForcePrivate); NameStr.append(Suffix.begin(), Suffix.end()); - if (!GV->hasPrivateLinkage() && !ForcePrivate) - return OutContext.GetOrCreateSymbol(NameStr.str()); - return OutContext.GetOrCreateTemporarySymbol(NameStr.str()); + return OutContext.GetOrCreateSymbol(NameStr.str()); } /// GetExternalSymbolSymbol - Return the MCSymbol for the specified diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index e97754e..e0e3ff7 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -19,6 +19,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" @@ -114,10 +115,11 @@ DIE::~DIE() { /// addSiblingOffset - Add a sibling offset field to the front of the DIE. /// -void DIE::addSiblingOffset() { - DIEInteger *DI = new DIEInteger(0); +DIEValue *DIE::addSiblingOffset(BumpPtrAllocator &A) { + DIEInteger *DI = new (A) DIEInteger(0); Values.insert(Values.begin(), DI); Abbrev.AddFirstAttribute(dwarf::DW_AT_sibling, dwarf::DW_FORM_ref4); + return DI; } #ifndef NDEBUG @@ -277,31 +279,6 @@ void DIELabel::print(raw_ostream &O) { #endif //===----------------------------------------------------------------------===// -// DIESectionOffset Implementation -//===----------------------------------------------------------------------===// - -/// EmitValue - Emit delta value. -/// -void DIESectionOffset::EmitValue(DwarfPrinter *D, unsigned Form) const { - bool IsSmall = Form == dwarf::DW_FORM_data4; - D->EmitSectionOffset(Label, Section, IsSmall, IsEH); -} - -/// SizeOf - Determine size of delta value in bytes. -/// -unsigned DIESectionOffset::SizeOf(const TargetData *TD, unsigned Form) const { - if (Form == dwarf::DW_FORM_data4) return 4; - return TD->getPointerSize(); -} - -#ifndef NDEBUG -void DIESectionOffset::print(raw_ostream &O) { - O << "Off: " << Label->getName() << "-" << Section->getName() - << "-" << IsEH; -} -#endif - -//===----------------------------------------------------------------------===// // DIEDelta Implementation //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index c5909fa..8b27ed2 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -153,7 +153,7 @@ namespace llvm { unsigned getOffset() const { return Offset; } unsigned getSize() const { return Size; } const std::vector<DIE *> &getChildren() const { return Children; } - SmallVector<DIEValue*, 32> &getValues() { return Values; } + const SmallVector<DIEValue*, 32> &getValues() const { return Values; } DIE *getParent() const { return Parent; } void setTag(unsigned Tag) { Abbrev.setTag(Tag); } void setOffset(unsigned O) { Offset = O; } @@ -171,8 +171,10 @@ namespace llvm { unsigned getSiblingOffset() const { return Offset + Size; } /// addSiblingOffset - Add a sibling offset field to the front of the DIE. + /// The caller is responsible for deleting the return value at or after the + /// same time it destroys this DIE. /// - void addSiblingOffset(); + DIEValue *addSiblingOffset(BumpPtrAllocator &A); /// addChild - Add a child to the DIE. /// @@ -328,38 +330,6 @@ namespace llvm { }; //===--------------------------------------------------------------------===// - /// DIESectionOffset - A section offset DIE. - /// - class DIESectionOffset : public DIEValue { - const MCSymbol *Label; - const MCSymbol *Section; - bool IsEH : 1; - public: - DIESectionOffset(const MCSymbol *Lab, const MCSymbol *Sec, - bool isEH = false) - : DIEValue(isSectionOffset), Label(Lab), Section(Sec), - IsEH(isEH) {} - - /// EmitValue - Emit section offset. - /// - virtual void EmitValue(DwarfPrinter *D, unsigned Form) const; - - /// SizeOf - Determine size of section offset value in bytes. - /// - virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const; - - // Implement isa/cast/dyncast. - static bool classof(const DIESectionOffset *) { return true; } - static bool classof(const DIEValue *D) { - return D->getType() == isSectionOffset; - } - -#ifndef NDEBUG - virtual void print(raw_ostream &O); -#endif - }; - - //===--------------------------------------------------------------------===// /// DIEDelta - A simple label difference DIE. /// class DIEDelta : public DIEValue { diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 7153fe2..fb91d4f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -92,11 +92,11 @@ public: /// addGlobal - Add a new global entity to the compile unit. /// - void addGlobal(const std::string &Name, DIE *Die) { Globals[Name] = Die; } + void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; } /// addGlobalType - Add a new global type to the compile unit. /// - void addGlobalType(const std::string &Name, DIE *Die) { + void addGlobalType(StringRef Name, DIE *Die) { GlobalTypes[Name] = Die; } @@ -149,20 +149,26 @@ class DbgVariable { DIVariable Var; // Variable Descriptor. unsigned FrameIndex; // Variable frame index. const MachineInstr *DbgValueMInsn; // DBG_VALUE + // DbgValueLabel - DBG_VALUE is effective from this label. + MCSymbol *DbgValueLabel; DbgVariable *const AbstractVar; // Abstract variable for this variable. DIE *TheDIE; public: // AbsVar may be NULL. DbgVariable(DIVariable V, unsigned I, DbgVariable *AbsVar) - : Var(V), FrameIndex(I), DbgValueMInsn(0), AbstractVar(AbsVar), TheDIE(0) {} + : Var(V), FrameIndex(I), DbgValueMInsn(0), + DbgValueLabel(0), AbstractVar(AbsVar), TheDIE(0) {} DbgVariable(DIVariable V, const MachineInstr *MI, DbgVariable *AbsVar) - : Var(V), FrameIndex(0), DbgValueMInsn(MI), AbstractVar(AbsVar), TheDIE(0) + : Var(V), FrameIndex(0), DbgValueMInsn(MI), DbgValueLabel(0), + AbstractVar(AbsVar), TheDIE(0) {} // Accessors. DIVariable getVariable() const { return Var; } unsigned getFrameIndex() const { return FrameIndex; } const MachineInstr *getDbgValue() const { return DbgValueMInsn; } + MCSymbol *getDbgValueLabel() const { return DbgValueLabel; } + void setDbgValueLabel(MCSymbol *L) { DbgValueLabel = L; } DbgVariable *getAbstractVariable() const { return AbstractVar; } void setDIE(DIE *D) { TheDIE = D; } DIE *getDIE() const { return TheDIE; } @@ -224,14 +230,14 @@ public: void fixInstructionMarkers(DenseMap<const MachineInstr *, unsigned> &MIIndexMap) { - assert (getFirstInsn() && "First instruction is missing!"); + assert(getFirstInsn() && "First instruction is missing!"); // Use the end of last child scope as end of this scope. const SmallVector<DbgScope *, 4> &Scopes = getScopes(); const MachineInstr *LastInsn = getFirstInsn(); unsigned LIndex = 0; if (Scopes.empty()) { - assert (getLastInsn() && "Inner most scope does not have last insn!"); + assert(getLastInsn() && "Inner most scope does not have last insn!"); return; } for (SmallVector<DbgScope *, 4>::const_iterator SI = Scopes.begin(), @@ -295,15 +301,15 @@ DbgScope::~DbgScope() { DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T) : DwarfPrinter(OS, A, T), ModuleCU(0), AbbreviationsSet(InitAbbreviationsSetSize), Abbreviations(), - DIEValues(), SectionSourceLines(), didInitial(false), shouldEmit(false), - CurrentFnDbgScope(0), DebugTimer(0) { + DIEBlocks(), SectionSourceLines(), didInitial(false), shouldEmit(false), + CurrentFnDbgScope(0), PrevDILoc(0), DebugTimer(0) { NextStringPoolNumber = 0; if (TimePassesIsEnabled) DebugTimer = new Timer("Dwarf Debug Writer"); } DwarfDebug::~DwarfDebug() { - for (unsigned j = 0, M = DIEValues.size(); j < M; ++j) - delete DIEValues[j]; + for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j) + DIEBlocks[j]->~DIEBlock(); delete DebugTimer; } @@ -343,8 +349,7 @@ void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) { /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug /// information entry. DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) { - DIEEntry *Value = new DIEEntry(Entry); - DIEValues.push_back(Value); + DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry); return Value; } @@ -353,8 +358,7 @@ DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) { void DwarfDebug::addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer) { if (!Form) Form = DIEInteger::BestForm(false, Integer); - DIEValue *Value = new DIEInteger(Integer); - DIEValues.push_back(Value); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); Die->addValue(Attribute, Form, Value); } @@ -363,8 +367,7 @@ void DwarfDebug::addUInt(DIE *Die, unsigned Attribute, void DwarfDebug::addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer) { if (!Form) Form = DIEInteger::BestForm(true, Integer); - DIEValue *Value = new DIEInteger(Integer); - DIEValues.push_back(Value); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); Die->addValue(Attribute, Form, Value); } @@ -372,8 +375,7 @@ void DwarfDebug::addSInt(DIE *Die, unsigned Attribute, /// keeps string reference. void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form, StringRef String) { - DIEValue *Value = new DIEString(String); - DIEValues.push_back(Value); + DIEValue *Value = new (DIEValueAllocator) DIEString(String); Die->addValue(Attribute, Form, Value); } @@ -381,18 +383,7 @@ void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form, /// void DwarfDebug::addLabel(DIE *Die, unsigned Attribute, unsigned Form, const MCSymbol *Label) { - DIEValue *Value = new DIELabel(Label); - DIEValues.push_back(Value); - Die->addValue(Attribute, Form, Value); -} - -/// addSectionOffset - Add a section offset label attribute data and value. -/// -void DwarfDebug::addSectionOffset(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Label,const MCSymbol *Section, - bool isEH) { - DIEValue *Value = new DIESectionOffset(Label, Section, isEH); - DIEValues.push_back(Value); + DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); Die->addValue(Attribute, Form, Value); } @@ -400,8 +391,7 @@ void DwarfDebug::addSectionOffset(DIE *Die, unsigned Attribute, unsigned Form, /// void DwarfDebug::addDelta(DIE *Die, unsigned Attribute, unsigned Form, const MCSymbol *Hi, const MCSymbol *Lo) { - DIEValue *Value = new DIEDelta(Hi, Lo); - DIEValues.push_back(Value); + DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); Die->addValue(Attribute, Form, Value); } @@ -410,7 +400,7 @@ void DwarfDebug::addDelta(DIE *Die, unsigned Attribute, unsigned Form, void DwarfDebug::addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block) { Block->ComputeSize(TD); - DIEValues.push_back(Block); + DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on. Die->addValue(Attribute, Block->BestForm(), Block); } @@ -457,8 +447,8 @@ void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) { unsigned Line = SP->getLineNumber(); if (!SP->getContext().Verify()) return; - unsigned FileID = GetOrCreateSourceID(SP->getContext().getDirectory(), - SP->getContext().getFilename()); + unsigned FileID = GetOrCreateSourceID(SP->getDirectory(), + SP->getFilename()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -564,7 +554,7 @@ void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die, // Decode the original location, and use that as the start of the byref // variable's location. unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); - DIEBlock *Block = new DIEBlock(); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); if (Location.isReg()) { if (Reg < 32) { @@ -696,15 +686,15 @@ void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, } // Get the offsets for the forwarding field and the variable field. - unsigned int forwardingFieldOffset = + unsigned forwardingFieldOffset = DIDerivedType(forwardingField.getNode()).getOffsetInBits() >> 3; - unsigned int varFieldOffset = + unsigned varFieldOffset = DIDerivedType(varField.getNode()).getOffsetInBits() >> 3; // Decode the original location, and use that as the start of the byref // variable's location. unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); - DIEBlock *Block = new DIEBlock(); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); if (Location.isReg()) { if (Reg < 32) @@ -759,7 +749,7 @@ void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, void DwarfDebug::addAddress(DIE *Die, unsigned Attribute, const MachineLocation &Location) { unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); - DIEBlock *Block = new DIEBlock(); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); if (Location.isReg()) { if (Reg < 32) { @@ -1106,7 +1096,7 @@ DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) { addSourceLine(MemberDie, &DT); - DIEBlock *MemLocationDie = new DIEBlock(); + DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock(); addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); uint64_t Size = DT.getSizeInBits(); @@ -1142,7 +1132,7 @@ DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) { // expression to extract appropriate offset from vtable. // BaseAddr = ObAddr + *((*ObAddr) - Offset) - DIEBlock *VBaseLocationDie = new DIEBlock(); + DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock(); addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); @@ -1208,7 +1198,7 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { unsigned VK = SP.getVirtuality(); if (VK) { addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK); - DIEBlock *Block = new DIEBlock(); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); addUInt(Block, 0, dwarf::DW_FORM_data1, SP.getVirtualIndex()); addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block); @@ -1244,13 +1234,13 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { return SPDie; } -/// getUpdatedDbgScope - Find or create DbgScope assicated with the instruction. -/// Initialize scope and update scope hierarchy. +/// getUpdatedDbgScope - Find DbgScope assicated with the instruction. +/// Update scope hierarchy. Create abstract scope if required. DbgScope *DwarfDebug::getUpdatedDbgScope(MDNode *N, const MachineInstr *MI, - MDNode *InlinedAt) { - assert (N && "Invalid Scope encoding!"); - assert (MI && "Missing machine instruction!"); - bool GetConcreteScope = (MI && InlinedAt); + MDNode *InlinedAt) { + assert(N && "Invalid Scope encoding!"); + assert(MI && "Missing machine instruction!"); + bool isAConcreteScope = InlinedAt != 0; DbgScope *NScope = NULL; @@ -1258,17 +1248,17 @@ DbgScope *DwarfDebug::getUpdatedDbgScope(MDNode *N, const MachineInstr *MI, NScope = DbgScopeMap.lookup(InlinedAt); else NScope = DbgScopeMap.lookup(N); - assert (NScope && "Unable to find working scope!"); + assert(NScope && "Unable to find working scope!"); if (NScope->getFirstInsn()) return NScope; DbgScope *Parent = NULL; - if (GetConcreteScope) { + if (isAConcreteScope) { DILocation IL(InlinedAt); Parent = getUpdatedDbgScope(IL.getScope().getNode(), MI, IL.getOrigLocation().getNode()); - assert (Parent && "Unable to find Parent scope!"); + assert(Parent && "Unable to find Parent scope!"); NScope->setParent(Parent); Parent->addScope(NScope); } else if (DIDescriptor(N).isLexicalBlock()) { @@ -1286,7 +1276,7 @@ DbgScope *DwarfDebug::getUpdatedDbgScope(MDNode *N, const MachineInstr *MI, CurrentFnDbgScope = NScope; } - if (GetConcreteScope) { + if (isAConcreteScope) { ConcreteScopes[InlinedAt] = NScope; getOrCreateAbstractScope(N); } @@ -1295,7 +1285,7 @@ DbgScope *DwarfDebug::getUpdatedDbgScope(MDNode *N, const MachineInstr *MI, } DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) { - assert (N && "Invalid Scope encoding!"); + assert(N && "Invalid Scope encoding!"); DbgScope *AScope = AbstractScopes.lookup(N); if (AScope) @@ -1377,7 +1367,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) { DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { MCSymbol *Start = Scope->getStartLabel(); MCSymbol *End = Scope->getEndLabel(); - if (Start == 0) return 0; + if (Start == 0 || End == 0) return 0; assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); assert(End->isDefined() && "Invalid end label for an inlined scope!"); @@ -1400,7 +1390,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { MCSymbol *StartLabel = Scope->getStartLabel(); MCSymbol *EndLabel = Scope->getEndLabel(); - if (StartLabel == 0) return 0; + if (StartLabel == 0 || EndLabel == 0) return 0; assert(StartLabel->isDefined() && "Invalid starting label for an inlined scope!"); @@ -1413,7 +1403,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { DISubprogram InlinedSP = getDISubprogram(DS.getNode()); DIE *OriginDIE = ModuleCU->getDIE(InlinedSP.getNode()); - assert (OriginDIE && "Unable to find Origin DIE!"); + assert(OriginDIE && "Unable to find Origin DIE!"); addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, dwarf::DW_FORM_ref4, OriginDIE); @@ -1477,9 +1467,9 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { DISubprogram InlinedSP = getDISubprogram(DS.getNode()); DIE *OriginSPDIE = ModuleCU->getDIE(InlinedSP.getNode()); (void) OriginSPDIE; - assert (OriginSPDIE && "Unable to find Origin DIE for the SP!"); + assert(OriginSPDIE && "Unable to find Origin DIE for the SP!"); DIE *AbsDIE = DV->getAbstractVariable()->getDIE(); - assert (AbsDIE && "Unable to find Origin DIE for the Variable!"); + assert(AbsDIE && "Unable to find Origin DIE for the Variable!"); addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, dwarf::DW_FORM_ref4, AbsDIE); } @@ -1508,12 +1498,18 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { MachineLocation Location; Location.set(DbgValueInsn->getOperand(0).getReg()); addAddress(VariableDie, dwarf::DW_AT_location, Location); + if (MCSymbol *VS = DV->getDbgValueLabel()) + addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, + VS); } else if (DbgValueInsn->getOperand(0).getType() == MachineOperand::MO_Immediate) { - DIEBlock *Block = new DIEBlock(); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); unsigned Imm = DbgValueInsn->getOperand(0).getImm(); addUInt(Block, 0, dwarf::DW_FORM_udata, Imm); addBlock(VariableDie, dwarf::DW_AT_const_value, 0, Block); + if (MCSymbol *VS = DV->getDbgValueLabel()) + addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, + VS); } else { //FIXME : Handle other operand types. delete VariableDie; @@ -1523,7 +1519,8 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { } else { MachineLocation Location; unsigned FrameReg; - int Offset = RI->getFrameIndexReference(*MF, DV->getFrameIndex(), FrameReg); + int Offset = RI->getFrameIndexReference(*MF, DV->getFrameIndex(), + FrameReg); Location.set(FrameReg, Offset); if (VD.hasComplexAddress()) @@ -1576,10 +1573,9 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { else ScopeDIE = updateSubprogramScopeDIE(DS.getNode()); } - else { + else ScopeDIE = constructLexicalScopeDIE(Scope); - if (!ScopeDIE) return NULL; - } + if (!ScopeDIE) return NULL; // Add variables to scope. const SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables(); @@ -1608,7 +1604,7 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { /// source file names. If none currently exists, create a new id and insert it /// in the SourceIds map. This can update DirectoryNames and SourceFileNames /// maps as well. -unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName) { +unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName){ unsigned DId; StringMap<unsigned>::iterator DI = DirectoryIdMap.find(DirName); if (DI != DirectoryIdMap.end()) { @@ -1666,15 +1662,19 @@ void DwarfDebug::constructCompileUnit(MDNode *N) { unsigned ID = GetOrCreateSourceID(Dir, FN); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - // FIXME: Why getting the delta between two identical labels?? - addSectionOffset(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, - getTempLabel("section_line"), getTempLabel("section_line"), - false); addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string, DIUnit.getProducer()); addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1, DIUnit.getLanguage()); addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); + addLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + getTempLabel("text_begin")); + addLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + getTempLabel("text_end")); + // DW_AT_stmt_list is a offset of line number information for this + // compile unit in debug_line section. It is always zero when only one + // compile unit is emitted in one object file. + addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); if (!Dir.empty()) addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); @@ -1717,13 +1717,13 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) { DIDescriptor GVContext = DI_GV.getContext(); // Do not create specification DIE if context is either compile unit // or a subprogram. - if (DI_GV.isDefinition() && !GVContext.isCompileUnit() - && !GVContext.isFile() && !GVContext.isSubprogram()) { + if (DI_GV.isDefinition() && !GVContext.isCompileUnit() && + !GVContext.isFile() && !GVContext.isSubprogram()) { // Create specification DIE. DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, VariableDie); - DIEBlock *Block = new DIEBlock(); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); addLabel(Block, 0, dwarf::DW_FORM_udata, Asm->Mang->getSymbol(DI_GV.getGlobal())); @@ -1731,7 +1731,7 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) { addUInt(VariableDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); ModuleCU->addDie(VariableSpecDIE); } else { - DIEBlock *Block = new DIEBlock(); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); addLabel(Block, 0, dwarf::DW_FORM_udata, Asm->Mang->getSymbol(DI_GV.getGlobal())); @@ -1745,7 +1745,7 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) { DIType GTy = DI_GV.getType(); if (GTy.isCompositeType() && !GTy.getName().empty()) { DIEEntry *Entry = ModuleCU->getDIEEntry(GTy.getNode()); - assert (Entry && "Missing global type!"); + assert(Entry && "Missing global type!"); ModuleCU->addGlobalType(GTy.getName(), Entry->getEntry()); } return; @@ -1783,12 +1783,11 @@ void DwarfDebug::constructSubprogramDIE(MDNode *N) { void DwarfDebug::beginModule(Module *M, MachineModuleInfo *mmi) { this->M = M; - if (TimePassesIsEnabled) - DebugTimer->startTimer(); - if (!MAI->doesSupportDebugInformation()) return; + TimeRegion Timer(DebugTimer); + DebugInfoFinder DbgFinder; DbgFinder.processModule(*M); @@ -1836,9 +1835,6 @@ void DwarfDebug::beginModule(Module *M, MachineModuleInfo *mmi) { // Emit initial sections emitInitial(); - - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); } /// endModule - Emit all Dwarf sections that should come after the content. @@ -1847,8 +1843,7 @@ void DwarfDebug::endModule() { if (!ModuleCU) return; - if (TimePassesIsEnabled) - DebugTimer->startTimer(); + TimeRegion Timer(DebugTimer); // Attach DW_AT_inline attribute with inlined subprogram DIEs. for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(), @@ -1871,7 +1866,7 @@ void DwarfDebug::endModule() { if (!NDie) continue; addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); // FIXME - This is not the correct approach. - // addDIEEntry(NDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); + //addDIEEntry(NDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie } // Standard sections final addresses. @@ -1932,9 +1927,6 @@ void DwarfDebug::endModule() { delete ModuleCU; ModuleCU = NULL; // Reset for the next Module, if any. - - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); } /// findAbstractVariable - Find abstract variable, if any, associated with Var. @@ -1971,10 +1963,11 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, if (!Scope) return NULL; - AbsDbgVariable = new DbgVariable(Var, MI, + AbsDbgVariable = new DbgVariable(Var, MI, NULL /* No more-abstract variable*/); Scope->addVariable(AbsDbgVariable); AbstractVariables[Var.getNode()] = AbsDbgVariable; + DbgValueStartMap[MI] = AbsDbgVariable; return AbsDbgVariable; } @@ -2010,16 +2003,19 @@ void DwarfDebug::collectVariableInfo() { for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { const MachineInstr *MInsn = II; - if (MInsn->getOpcode() != TargetOpcode::DBG_VALUE) + if (!MInsn->isDebugValue()) continue; + // FIXME : Lift this restriction. if (MInsn->getNumOperands() != 3) continue; - DIVariable DV((MDNode*)(MInsn->getOperand(MInsn->getNumOperands() - 1).getMetadata())); + DIVariable DV((MDNode*)(MInsn->getOperand(MInsn->getNumOperands() + - 1).getMetadata())); if (DV.getTag() == dwarf::DW_TAG_arg_variable) { // FIXME Handle inlined subroutine arguments. DbgVariable *ArgVar = new DbgVariable(DV, MInsn, NULL); CurrentFnDbgScope->addVariable(ArgVar); + DbgValueStartMap[MInsn] = ArgVar; continue; } @@ -2034,19 +2030,54 @@ void DwarfDebug::collectVariableInfo() { if (!Scope) continue; - DbgVariable *AbsDbgVariable = findAbstractVariable(DV, MInsn, - ScopeLoc); + DbgVariable *AbsDbgVariable = findAbstractVariable(DV, MInsn, ScopeLoc); DbgVariable *RegVar = new DbgVariable(DV, MInsn, AbsDbgVariable); + DbgValueStartMap[MInsn] = RegVar; Scope->addVariable(RegVar); } } } -/// beginScope - Process beginning of a scope starting at Label. -void DwarfDebug::beginScope(const MachineInstr *MI, MCSymbol *Label) { +/// beginScope - Process beginning of a scope. +void DwarfDebug::beginScope(const MachineInstr *MI) { + // Check location. + DebugLoc DL = MI->getDebugLoc(); + if (DL.isUnknown()) + return; + DILocation DILoc = MF->getDILocation(DL); + if (!DILoc.getScope().Verify()) + return; + + // Check and update last known location info. + if(DILoc.getNode() == PrevDILoc) + return; + + // DBG_VALUE instruction establishes new value. + if (MI->isDebugValue()) { + DenseMap<const MachineInstr *, DbgVariable *>::iterator DI + = DbgValueStartMap.find(MI); + if (DI != DbgValueStartMap.end()) { + MCSymbol *Label = recordSourceLine(DILoc.getLineNumber(), + DILoc.getColumnNumber(), + DILoc.getScope().getNode()); + PrevDILoc = DILoc.getNode(); + DI->second->setDbgValueLabel(Label); + } + return; + } + + // Emit a label to indicate location change. This is used for line + // table even if this instruction does start a new scope. + MCSymbol *Label = recordSourceLine(DILoc.getLineNumber(), + DILoc.getColumnNumber(), + DILoc.getScope().getNode()); + PrevDILoc = DILoc.getNode(); + + // update DbgScope if this instruction starts a new scope. InsnToDbgScopeMapTy::iterator I = DbgScopeBeginMap.find(MI); if (I == DbgScopeBeginMap.end()) return; + ScopeVector &SD = I->second; for (ScopeVector::iterator SDI = SD.begin(), SDE = SD.end(); SDI != SDE; ++SDI) @@ -2055,6 +2086,19 @@ void DwarfDebug::beginScope(const MachineInstr *MI, MCSymbol *Label) { /// endScope - Process end of a scope. void DwarfDebug::endScope(const MachineInstr *MI) { + // Ignore DBG_VALUE instruction. + if (MI->isDebugValue()) + return; + + // Check location. + DebugLoc DL = MI->getDebugLoc(); + if (DL.isUnknown()) + return; + DILocation DILoc = MF->getDILocation(DL); + if (!DILoc.getScope().Verify()) + return; + + // Emit a label and update DbgScope if this instruction ends a scope. InsnToDbgScopeMapTy::iterator I = DbgScopeEndMap.find(MI); if (I == DbgScopeEndMap.end()) return; @@ -2094,7 +2138,7 @@ void DwarfDebug::createDbgScope(MDNode *Scope, MDNode *InlinedAt) { } /// extractScopeInformation - Scan machine instructions in this function -/// and collect DbgScopes. Return true, if atleast one scope was found. +/// and collect DbgScopes. Return true, if at least one scope was found. bool DwarfDebug::extractScopeInformation() { // If scope information was extracted using .dbg intrinsics then there is not // any need to extract these information by scanning each instruction. @@ -2110,12 +2154,13 @@ bool DwarfDebug::extractScopeInformation() { II != IE; ++II) { const MachineInstr *MInsn = II; // FIXME : Remove DBG_VALUE check. - if (MInsn->getOpcode() == TargetOpcode::DBG_VALUE) continue; + if (MInsn->isDebugValue()) continue; MIIndexMap[MInsn] = MIIndex++; DebugLoc DL = MInsn->getDebugLoc(); if (DL.isUnknown()) continue; DILocation DLT = MF->getDILocation(DL); DIScope DLTScope = DLT.getScope(); + if (!DLTScope.getNode()) continue; // There is no need to create another DIE for compile unit. For all // other scopes, create one DbgScope now. This will be translated // into a scope DIE at the end. @@ -2132,11 +2177,12 @@ bool DwarfDebug::extractScopeInformation() { II != IE; ++II) { const MachineInstr *MInsn = II; // FIXME : Remove DBG_VALUE check. - if (MInsn->getOpcode() == TargetOpcode::DBG_VALUE) continue; + if (MInsn->isDebugValue()) continue; DebugLoc DL = MInsn->getDebugLoc(); if (DL.isUnknown()) continue; DILocation DLT = MF->getDILocation(DL); DIScope DLTScope = DLT.getScope(); + if (!DLTScope.getNode()) continue; // There is no need to create another DIE for compile unit. For all // other scopes, create one DbgScope now. This will be translated // into a scope DIE at the end. @@ -2159,7 +2205,7 @@ bool DwarfDebug::extractScopeInformation() { SmallVector<DbgScope *, 4> WorkList; WorkList.push_back(CurrentFnDbgScope); while (!WorkList.empty()) { - DbgScope *S = WorkList.back(); WorkList.pop_back(); + DbgScope *S = WorkList.pop_back_val(); const SmallVector<DbgScope *, 4> &Children = S->getScopes(); if (!Children.empty()) @@ -2170,7 +2216,7 @@ bool DwarfDebug::extractScopeInformation() { if (S->isAbstractScope()) continue; const MachineInstr *MI = S->getFirstInsn(); - assert (MI && "DbgScope does not have first instruction!"); + assert(MI && "DbgScope does not have first instruction!"); InsnToDbgScopeMapTy::iterator IDI = DbgScopeBeginMap.find(MI); if (IDI != DbgScopeBeginMap.end()) @@ -2179,7 +2225,7 @@ bool DwarfDebug::extractScopeInformation() { DbgScopeBeginMap[MI].push_back(S); MI = S->getLastInsn(); - assert (MI && "DbgScope does not have last instruction!"); + assert(MI && "DbgScope does not have last instruction!"); IDI = DbgScopeEndMap.find(MI); if (IDI != DbgScopeEndMap.end()) IDI->second.push_back(S); @@ -2196,12 +2242,10 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { this->MF = MF; if (!ShouldEmitDwarfDebug()) return; - - if (TimePassesIsEnabled) - DebugTimer->startTimer(); - if (!extractScopeInformation()) return; + + TimeRegion Timer(DebugTimer); collectVariableInfo(); @@ -2225,20 +2269,15 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { recordSourceLine(Line, Col, DLT.getScope().getNode()); } - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); } /// endFunction - Gather and emit post-function debug information. /// void DwarfDebug::endFunction(const MachineFunction *MF) { if (!ShouldEmitDwarfDebug()) return; - - if (TimePassesIsEnabled) - DebugTimer->startTimer(); - - if (DbgScopeMap.empty()) - return; + if (DbgScopeMap.empty()) return; + + TimeRegion Timer(DebugTimer); if (CurrentFnDbgScope) { // Define end label for subprogram. @@ -2271,14 +2310,12 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { DeleteContainerSeconds(DbgScopeMap); DbgScopeBeginMap.clear(); DbgScopeEndMap.clear(); + DbgValueStartMap.clear(); ConcreteScopes.clear(); DeleteContainerSeconds(AbstractScopes); AbstractScopesList.clear(); AbstractVariables.clear(); Lines.clear(); - - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); } /// recordSourceLine - Register a source line with debug info. Returns the @@ -2288,8 +2325,7 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, MDNode *S) { if (!MMI) return 0; - if (TimePassesIsEnabled) - DebugTimer->startTimer(); + TimeRegion Timer(DebugTimer); StringRef Dir; StringRef Fn; @@ -2314,9 +2350,6 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, MDNode *S) { MCSymbol *Label = MMI->getContext().CreateTempSymbol(); Lines.push_back(SrcLineInfo(Line, Col, Src, Label)); - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); - Asm->OutStreamer.EmitLabel(Label); return Label; } @@ -2328,15 +2361,8 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, MDNode *S) { /// well. unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName, const std::string &FileName) { - if (TimePassesIsEnabled) - DebugTimer->startTimer(); - - unsigned SrcId = GetOrCreateSourceID(DirName.c_str(), FileName.c_str()); - - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); - - return SrcId; + TimeRegion Timer(DebugTimer); + return GetOrCreateSourceID(DirName.c_str(), FileName.c_str()); } //===----------------------------------------------------------------------===// @@ -2351,7 +2377,8 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) { const std::vector<DIE *> &Children = Die->getChildren(); // If not last sibling and has children then add sibling offset attribute. - if (!Last && !Children.empty()) Die->addSiblingOffset(); + if (!Last && !Children.empty()) + Die->addSiblingOffset(DIEValueAllocator); // Record the abbreviation. assignAbbrevNumber(Die->getAbbrev()); @@ -2465,7 +2492,7 @@ void DwarfDebug::emitDIE(DIE *Die) { dwarf::TagString(Abbrev->getTag())); EmitULEB128(AbbrevNumber); - SmallVector<DIEValue*, 32> &Values = Die->getValues(); + const SmallVector<DIEValue*, 32> &Values = Die->getValues(); const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData(); // Emit the DIE attribute values. diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index d6634e1..ad6b0c2 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -19,6 +19,7 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineLocation.h" #include "llvm/Analysis/DebugInfo.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" @@ -98,9 +99,11 @@ class DwarfDebug : public DwarfPrinter { /// Lines - List of source line correspondence. std::vector<SrcLineInfo> Lines; - /// DIEValues - A list of all the unique values in use. - /// - std::vector<DIEValue *> DIEValues; + /// DIEBlocks - A list of all the DIEBlocks in use. + std::vector<DIEBlock *> DIEBlocks; + + // DIEValueAllocator - All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; /// StringPool - A String->Symbol mapping of strings used by indirect /// references. @@ -141,12 +144,21 @@ class DwarfDebug : public DwarfPrinter { /// AbstractScopes - Tracks the abstract scopes a module. These scopes are /// not included DbgScopeMap. AbstractScopes owns its DbgScope*s. DenseMap<MDNode *, DbgScope *> AbstractScopes; + + /// AbstractScopesList - Tracks abstract scopes constructed while processing + /// a function. This list is cleared during endFunction(). SmallVector<DbgScope *, 4>AbstractScopesList; /// AbstractVariables - Collection on abstract variables. Owned by the /// DbgScopes in AbstractScopes. DenseMap<MDNode *, DbgVariable *> AbstractVariables; + /// DbgValueStartMap - Tracks starting scope of variable DIEs. + /// If the scope of an object begins sometime after the low pc value for the + /// scope most closely enclosing the object, the object entry may have a + /// DW_AT_start_scope attribute. + DenseMap<const MachineInstr *, DbgVariable *> DbgValueStartMap; + /// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked /// (at the end of the module) as DW_AT_inline. SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs; @@ -181,6 +193,10 @@ class DwarfDebug : public DwarfPrinter { /// function. DenseMap<CompileUnit *, unsigned> CompileUnitOffsets; + /// Previous instruction's location information. This is used to determine + /// label location to indicate scope boundries in dwarf debug info. + mutable const MDNode *PrevDILoc; + /// DebugTimer - Timer for the Dwarf debug writer. Timer *DebugTimer; @@ -250,12 +266,6 @@ class DwarfDebug : public DwarfPrinter { void addLabel(DIE *Die, unsigned Attribute, unsigned Form, const MCSymbol *Label); - /// addSectionOffset - Add a section offset label attribute data and value. - /// - void addSectionOffset(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Label, const MCSymbol *Section, - bool isEH = false); - /// addDelta - Add a label delta attribute data and value. /// void addDelta(DIE *Die, unsigned Attribute, unsigned Form, @@ -545,8 +555,8 @@ public: /// collectVariableInfo - Populate DbgScope entries with variables' info. void collectVariableInfo(); - /// beginScope - Process beginning of a scope starting at Label. - void beginScope(const MachineInstr *MI, MCSymbol *Label); + /// beginScope - Process beginning of a scope. + void beginScope(const MachineInstr *MI); /// endScope - Prcess end of a scope. void endScope(const MachineInstr *MI); diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 4946b4c..8b616b0 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -419,23 +419,24 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) { for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { const MachineOperand &MO = MI->getOperand(I); - if (MO.isGlobal()) { - if (Function *F = dyn_cast<Function>(MO.getGlobal())) { - if (SawFunc) { - // Be conservative. If we have more than one function operand for this - // call, then we can't make the assumption that it's the callee and - // not a parameter to the call. - // - // FIXME: Determine if there's a way to say that `F' is the callee or - // parameter. - MarkedNoUnwind = false; - break; - } - - MarkedNoUnwind = F->doesNotThrow(); - SawFunc = true; - } + if (!MO.isGlobal()) continue; + + Function *F = dyn_cast<Function>(MO.getGlobal()); + if (F == 0) continue; + + if (SawFunc) { + // Be conservative. If we have more than one function operand for this + // call, then we can't make the assumption that it's the callee and + // not a parameter to the call. + // + // FIXME: Determine if there's a way to say that `F' is the callee or + // parameter. + MarkedNoUnwind = false; + break; } + + MarkedNoUnwind = F->doesNotThrow(); + SawFunc = true; } return MarkedNoUnwind; @@ -504,7 +505,10 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, LastLabel = LandingPad->EndLabels[P.RangeIndex]; assert(BeginLabel && LastLabel && "Invalid landing pad!"); - if (LandingPad->LandingPadLabel) { + if (!LandingPad->LandingPadLabel) { + // Create a gap. + PreviousIsInvoke = false; + } else { // This try-range is for an invoke. CallSiteEntry Site = { BeginLabel, @@ -536,9 +540,6 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, CallSites[SiteNo - 1] = Site; } PreviousIsInvoke = true; - } else { - // Create a gap. - PreviousIsInvoke = false; } } } @@ -885,8 +886,7 @@ void DwarfException::EndModule() { if (!shouldEmitMovesModule && !shouldEmitTableModule) return; - if (TimePassesIsEnabled) - ExceptionTimer->startTimer(); + TimeRegion Timer(ExceptionTimer); const std::vector<Function *> Personalities = MMI->getPersonalities(); @@ -896,9 +896,6 @@ void DwarfException::EndModule() { for (std::vector<FunctionEHFrameInfo>::iterator I = EHFrames.begin(), E = EHFrames.end(); I != E; ++I) EmitFDE(*I); - - if (TimePassesIsEnabled) - ExceptionTimer->stopTimer(); } /// BeginFunction - Gather pre-function exception information. Assumes it's @@ -906,9 +903,7 @@ void DwarfException::EndModule() { void DwarfException::BeginFunction(const MachineFunction *MF) { if (!MMI || !MAI->doesSupportExceptionHandling()) return; - if (TimePassesIsEnabled) - ExceptionTimer->startTimer(); - + TimeRegion Timer(ExceptionTimer); this->MF = MF; shouldEmitTable = shouldEmitMoves = false; @@ -924,9 +919,6 @@ void DwarfException::BeginFunction(const MachineFunction *MF) { shouldEmitTableModule |= shouldEmitTable; shouldEmitMovesModule |= shouldEmitMoves; - - if (TimePassesIsEnabled) - ExceptionTimer->stopTimer(); } /// EndFunction - Gather and emit post-function exception information. @@ -934,9 +926,7 @@ void DwarfException::BeginFunction(const MachineFunction *MF) { void DwarfException::EndFunction() { if (!shouldEmitMoves && !shouldEmitTable) return; - if (TimePassesIsEnabled) - ExceptionTimer->startTimer(); - + TimeRegion Timer(ExceptionTimer); Asm->OutStreamer.EmitLabel(getDWLabel("eh_func_end", SubprogramCount)); // Record if this personality index uses a landing pad. @@ -961,7 +951,4 @@ void DwarfException::EndFunction() { !MMI->getLandingPads().empty(), MMI->getFrameMoves(), MF->getFunction())); - - if (TimePassesIsEnabled) - ExceptionTimer->stopTimer(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp index e212696..17eb2e8 100644 --- a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp @@ -45,14 +45,14 @@ MCSymbol *DwarfPrinter::getDWLabel(const char *Name, unsigned ID) const { //assert(ID && "Should use getTempLabel if no ID"); if (ID == 0) return getTempLabel(Name); - return Asm->OutContext.GetOrCreateTemporarySymbol + return Asm->OutContext.GetOrCreateSymbol (Twine(MAI->getPrivateGlobalPrefix()) + Twine(Name) + Twine(ID)); } /// getTempLabel - Return the MCSymbol corresponding to the assembler temporary /// label with the specified name. MCSymbol *DwarfPrinter::getTempLabel(const char *Name) const { - return Asm->OutContext.GetOrCreateTemporarySymbol + return Asm->OutContext.GetOrCreateSymbol (Twine(MAI->getPrivateGlobalPrefix()) + Name); } diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp index 9fd4c44..a2d7ab1 100644 --- a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp @@ -73,27 +73,14 @@ void DwarfWriter::EndFunction(const MachineFunction *MF) { MMI->EndFunction(); } -/// RecordSourceLine - Register a source line with debug info. Returns the -/// unique label that was emitted and which provides correspondence to -/// the source line list. -MCSymbol *DwarfWriter::RecordSourceLine(unsigned Line, unsigned Col, - MDNode *Scope) { - return DD->recordSourceLine(Line, Col, Scope); -} - -/// getRecordSourceLineCount - Count source lines. -unsigned DwarfWriter::getRecordSourceLineCount() { - return DD->getSourceLineCount(); -} - /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should /// be emitted. bool DwarfWriter::ShouldEmitDwarfDebug() const { return DD && DD->ShouldEmitDwarfDebug(); } -void DwarfWriter::BeginScope(const MachineInstr *MI, MCSymbol *L) { - DD->beginScope(MI, L); +void DwarfWriter::BeginScope(const MachineInstr *MI) { + DD->beginScope(MI); } void DwarfWriter::EndScope(const MachineInstr *MI) { DD->endScope(MI); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 151e9cd..8f51940 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -972,15 +972,21 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1, // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to // optimize branches that branch to either a return block or an assert block // into a fallthrough to the return. - if (MBB1->empty() || MBB2->empty()) return false; + if (IsEmptyBlock(MBB1) || IsEmptyBlock(MBB2)) return false; // If there is a clear successor ordering we make sure that one block // will fall through to the next if (MBB1->isSuccessor(MBB2)) return true; if (MBB2->isSuccessor(MBB1)) return false; - MachineInstr *MBB1I = --MBB1->end(); - MachineInstr *MBB2I = --MBB2->end(); + // Neither block consists entirely of debug info (per IsEmptyBlock check), + // so we needn't test for falling off the beginning here. + MachineBasicBlock::iterator MBB1I = --MBB1->end(); + while (MBB1I->isDebugValue()) + --MBB1I; + MachineBasicBlock::iterator MBB2I = --MBB2->end(); + while (MBB2I->isDebugValue()) + --MBB2I; return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall(); } diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 39fc85e..8bae9ed 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -8,19 +8,19 @@ //===----------------------------------------------------------------------===// // // This pass mulches exception handling code into a form adapted to code -// generation. Required if using dwarf exception handling. +// generation. Required if using dwarf exception handling. // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "dwarfehprepare" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -40,6 +40,15 @@ namespace { // The eh.exception intrinsic. Function *ExceptionValueIntrinsic; + // The eh.selector intrinsic. + Function *SelectorIntrinsic; + + // _Unwind_Resume_or_Rethrow call. + Constant *URoR; + + // The EH language-specific catch-all type. + GlobalVariable *EHCatchAllValue; + // _Unwind_Resume or the target equivalent. Constant *RewindFunction; @@ -67,18 +76,88 @@ namespace { Instruction *CreateValueLoad(BasicBlock *BB); /// CreateReadOfExceptionValue - Return the result of the eh.exception - /// intrinsic by calling the intrinsic if in a landing pad, or loading - /// it from the exception value variable otherwise. + /// intrinsic by calling the intrinsic if in a landing pad, or loading it + /// from the exception value variable otherwise. Instruction *CreateReadOfExceptionValue(BasicBlock *BB) { return LandingPads.count(BB) ? CreateExceptionValueCall(BB) : CreateValueLoad(BB); } + /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still + /// use the ".llvm.eh.catch.all.value" call need to convert to using it's + /// initializer instead. + bool CleanupSelectors(); + + /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups. + void FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels); + + /// FindAllURoRInvokes - Find all URoR invokes in the function. + void FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes); + + /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" + /// calls. The "unwind" part of these invokes jump to a landing pad within + /// the current function. This is a candidate to merge the selector + /// associated with the URoR invoke with the one from the URoR's landing + /// pad. + bool HandleURoRInvokes(); + + /// FindSelectorAndURoR - Find the eh.selector call and URoR call associated + /// with the eh.exception call. This recursively looks past instructions + /// which don't change the EH pointer value, like casts or PHI nodes. + bool FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, + SmallPtrSet<IntrinsicInst*, 8> &SelCalls); + + /// DoMem2RegPromotion - Take an alloca call and promote it from memory to a + /// register. + bool DoMem2RegPromotion(Value *V) { + AllocaInst *AI = dyn_cast<AllocaInst>(V); + if (!AI || !isAllocaPromotable(AI)) return false; + + // Turn the alloca into a register. + std::vector<AllocaInst*> Allocas(1, AI); + PromoteMemToReg(Allocas, *DT, *DF); + return true; + } + + /// PromoteStoreInst - Perform Mem2Reg on a StoreInst. + bool PromoteStoreInst(StoreInst *SI) { + if (!SI || !DT || !DF) return false; + if (DoMem2RegPromotion(SI->getOperand(1))) + return true; + return false; + } + + /// PromoteEHPtrStore - Promote the storing of an EH pointer into a + /// register. This should get rid of the store and subsequent loads. + bool PromoteEHPtrStore(IntrinsicInst *II) { + if (!DT || !DF) return false; + + bool Changed = false; + StoreInst *SI; + + while (1) { + SI = 0; + for (Value::use_iterator + I = II->use_begin(), E = II->use_end(); I != E; ++I) { + SI = dyn_cast<StoreInst>(I); + if (SI) break; + } + + if (!PromoteStoreInst(SI)) + break; + + Changed = true; + } + + return false; + } + public: static char ID; // Pass identification, replacement for typeid. DwarfEHPrepare(const TargetLowering *tli, bool fast) : FunctionPass(&ID), TLI(tli), CompileFast(fast), - ExceptionValueIntrinsic(0), RewindFunction(0) {} + ExceptionValueIntrinsic(0), SelectorIntrinsic(0), + URoR(0), EHCatchAllValue(0), RewindFunction(0) {} virtual bool runOnFunction(Function &Fn); @@ -105,6 +184,233 @@ FunctionPass *llvm::createDwarfEHPass(const TargetLowering *tli, bool fast) { return new DwarfEHPrepare(tli, fast); } +/// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups. +void DwarfEHPrepare:: +FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) { + for (Value::use_iterator + I = SelectorIntrinsic->use_begin(), + E = SelectorIntrinsic->use_end(); I != E; ++I) { + IntrinsicInst *SI = cast<IntrinsicInst>(I); + if (!SI || SI->getParent()->getParent() != F) continue; + + unsigned NumOps = SI->getNumOperands(); + if (NumOps > 4) continue; + bool IsCleanUp = (NumOps == 3); + + if (!IsCleanUp) + if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getOperand(3))) + IsCleanUp = (CI->getZExtValue() == 0); + + if (IsCleanUp) + Sels.insert(SI); + } +} + +/// FindAllURoRInvokes - Find all URoR invokes in the function. +void DwarfEHPrepare:: +FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) { + for (Value::use_iterator + I = URoR->use_begin(), + E = URoR->use_end(); I != E; ++I) { + if (InvokeInst *II = dyn_cast<InvokeInst>(I)) + URoRInvokes.insert(II); + } +} + +/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use +/// the ".llvm.eh.catch.all.value" call need to convert to using it's +/// initializer instead. +bool DwarfEHPrepare::CleanupSelectors() { + if (!EHCatchAllValue) return false; + + if (!SelectorIntrinsic) { + SelectorIntrinsic = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector); + if (!SelectorIntrinsic) return false; + } + + bool Changed = false; + for (Value::use_iterator + I = SelectorIntrinsic->use_begin(), + E = SelectorIntrinsic->use_end(); I != E; ++I) { + IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(I); + if (!Sel || Sel->getParent()->getParent() != F) continue; + + // Index of the ".llvm.eh.catch.all.value" variable. + unsigned OpIdx = Sel->getNumOperands() - 1; + GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getOperand(OpIdx)); + if (GV != EHCatchAllValue) continue; + Sel->setOperand(OpIdx, EHCatchAllValue->getInitializer()); + Changed = true; + } + + return Changed; +} + +/// FindSelectorAndURoR - Find the eh.selector call associated with the +/// eh.exception call. And indicate if there is a URoR "invoke" associated with +/// the eh.exception call. This recursively looks past instructions which don't +/// change the EH pointer value, like casts or PHI nodes. +bool +DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, + SmallPtrSet<IntrinsicInst*, 8> &SelCalls) { + SmallPtrSet<PHINode*, 32> SeenPHIs; + bool Changed = false; + + restart: + for (Value::use_iterator + I = Inst->use_begin(), E = Inst->use_end(); I != E; ++I) { + Instruction *II = dyn_cast<Instruction>(I); + if (!II || II->getParent()->getParent() != F) continue; + + if (IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(II)) { + if (Sel->getIntrinsicID() == Intrinsic::eh_selector) + SelCalls.insert(Sel); + } else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(II)) { + if (Invoke->getCalledFunction() == URoR) + URoRInvoke = true; + } else if (CastInst *CI = dyn_cast<CastInst>(II)) { + Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls); + } else if (StoreInst *SI = dyn_cast<StoreInst>(II)) { + if (!PromoteStoreInst(SI)) continue; + Changed = true; + SeenPHIs.clear(); + goto restart; // Uses may have changed, restart loop. + } else if (PHINode *PN = dyn_cast<PHINode>(II)) { + if (SeenPHIs.insert(PN)) + // Don't process a PHI node more than once. + Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls); + } + } + + return Changed; +} + +/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" calls. The +/// "unwind" part of these invokes jump to a landing pad within the current +/// function. This is a candidate to merge the selector associated with the URoR +/// invoke with the one from the URoR's landing pad. +bool DwarfEHPrepare::HandleURoRInvokes() { + if (!DT) return CleanupSelectors(); // We require DominatorTree information. + + if (!EHCatchAllValue) { + EHCatchAllValue = + F->getParent()->getNamedGlobal(".llvm.eh.catch.all.value"); + if (!EHCatchAllValue) return false; + } + + if (!SelectorIntrinsic) { + SelectorIntrinsic = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector); + if (!SelectorIntrinsic) return false; + } + + if (!URoR) { + URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow"); + if (!URoR) return CleanupSelectors(); + } + + SmallPtrSet<IntrinsicInst*, 32> Sels; + SmallPtrSet<InvokeInst*, 32> URoRInvokes; + FindAllCleanupSelectors(Sels); + FindAllURoRInvokes(URoRInvokes); + + SmallPtrSet<IntrinsicInst*, 32> SelsToConvert; + + for (SmallPtrSet<IntrinsicInst*, 32>::iterator + SI = Sels.begin(), SE = Sels.end(); SI != SE; ++SI) { + const BasicBlock *SelBB = (*SI)->getParent(); + for (SmallPtrSet<InvokeInst*, 32>::iterator + UI = URoRInvokes.begin(), UE = URoRInvokes.end(); UI != UE; ++UI) { + const BasicBlock *URoRBB = (*UI)->getParent(); + if (SelBB == URoRBB || DT->dominates(SelBB, URoRBB)) { + SelsToConvert.insert(*SI); + break; + } + } + } + + bool Changed = false; + + if (Sels.size() != SelsToConvert.size()) { + // If we haven't been able to convert all of the clean-up selectors, then + // loop through the slow way to see if they still need to be converted. + if (!ExceptionValueIntrinsic) { + ExceptionValueIntrinsic = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception); + if (!ExceptionValueIntrinsic) return CleanupSelectors(); + } + + for (Value::use_iterator + I = ExceptionValueIntrinsic->use_begin(), + E = ExceptionValueIntrinsic->use_end(); I != E; ++I) { + IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(I); + if (!EHPtr || EHPtr->getParent()->getParent() != F) continue; + + Changed |= PromoteEHPtrStore(EHPtr); + + bool URoRInvoke = false; + SmallPtrSet<IntrinsicInst*, 8> SelCalls; + Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls); + + if (URoRInvoke) { + // This EH pointer is being used by an invoke of an URoR instruction and + // an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we + // need to convert it to a 'catch-all'. + for (SmallPtrSet<IntrinsicInst*, 8>::iterator + SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI) { + IntrinsicInst *II = *SI; + unsigned NumOps = II->getNumOperands(); + + if (NumOps <= 4) { + bool IsCleanUp = (NumOps == 3); + + if (!IsCleanUp) + if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getOperand(3))) + IsCleanUp = (CI->getZExtValue() == 0); + + if (IsCleanUp) + SelsToConvert.insert(II); + } + } + } + } + } + + if (!SelsToConvert.empty()) { + // Convert all clean-up eh.selectors, which are associated with "invokes" of + // URoR calls, into catch-all eh.selectors. + Changed = true; + + for (SmallPtrSet<IntrinsicInst*, 8>::iterator + SI = SelsToConvert.begin(), SE = SelsToConvert.end(); + SI != SE; ++SI) { + IntrinsicInst *II = *SI; + SmallVector<Value*, 8> Args; + + // Use the exception object pointer and the personality function + // from the original selector. + Args.push_back(II->getOperand(1)); // Exception object pointer. + Args.push_back(II->getOperand(2)); // Personality function. + Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator. + + CallInst *NewSelector = + CallInst::Create(SelectorIntrinsic, Args.begin(), Args.end(), + "eh.sel.catch.all", II); + + NewSelector->setTailCall(II->isTailCall()); + NewSelector->setAttributes(II->getAttributes()); + NewSelector->setCallingConv(II->getCallingConv()); + + II->replaceAllUsesWith(NewSelector); + II->eraseFromParent(); + } + } + + Changed |= CleanupSelectors(); + return Changed; +} + /// NormalizeLandingPads - Normalize and discover landing pads, noting them /// in the LandingPads set. A landing pad is normal if the only CFG edges /// that end at it are unwind edges from invoke instructions. If we inlined @@ -422,6 +728,8 @@ bool DwarfEHPrepare::runOnFunction(Function &Fn) { if (!CompileFast) Changed |= PromoteStackTemporaries(); + Changed |= HandleURoRInvokes(); + LandingPads.clear(); return Changed; diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index e207f60..025ad05 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -303,9 +303,7 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, // otherwise mark it as ~1U so it can be nuked later. if (ValNo->id == getNumValNums()-1) { do { - VNInfo *VNI = valnos.back(); valnos.pop_back(); - VNI->~VNInfo(); } while (!valnos.empty() && valnos.back()->isUnused()); } else { ValNo->setIsUnused(true); @@ -351,9 +349,7 @@ void LiveInterval::removeValNo(VNInfo *ValNo) { // otherwise mark it as ~1U so it can be nuked later. if (ValNo->id == getNumValNums()-1) { do { - VNInfo *VNI = valnos.back(); valnos.pop_back(); - VNI->~VNInfo(); } while (!valnos.empty() && valnos.back()->isUnused()); } else { ValNo->setIsUnused(true); @@ -579,9 +575,7 @@ void LiveInterval::MergeValueInAsValue( // mark it as ~1U so it can be nuked later. if (V1->id == getNumValNums()-1) { do { - VNInfo *VNI = valnos.back(); valnos.pop_back(); - VNI->~VNInfo(); } while (!valnos.empty() && valnos.back()->isUnused()); } else { V1->setIsUnused(true); @@ -597,7 +591,7 @@ void LiveInterval::MergeValueInAsValue( /// used with an unknown definition value. void LiveInterval::MergeInClobberRanges(LiveIntervals &li_, const LiveInterval &Clobbers, - BumpPtrAllocator &VNInfoAllocator) { + VNInfo::Allocator &VNInfoAllocator) { if (Clobbers.empty()) return; DenseMap<VNInfo*, VNInfo*> ValNoMaps; @@ -658,14 +652,13 @@ void LiveInterval::MergeInClobberRanges(LiveIntervals &li_, if (UnusedValNo) { // Delete the last unused val#. valnos.pop_back(); - UnusedValNo->~VNInfo(); } } void LiveInterval::MergeInClobberRange(LiveIntervals &li_, SlotIndex Start, SlotIndex End, - BumpPtrAllocator &VNInfoAllocator) { + VNInfo::Allocator &VNInfoAllocator) { // Find a value # to use for the clobber ranges. If there is already a value# // for unknown values, use it. VNInfo *ClobberValNo = @@ -749,9 +742,7 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { // ~1U so it can be nuked later. if (V1->id == getNumValNums()-1) { do { - VNInfo *VNI = valnos.back(); valnos.pop_back(); - VNI->~VNInfo(); } while (valnos.back()->isUnused()); } else { V1->setIsUnused(true); @@ -762,7 +753,7 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { void LiveInterval::Copy(const LiveInterval &RHS, MachineRegisterInfo *MRI, - BumpPtrAllocator &VNInfoAllocator) { + VNInfo::Allocator &VNInfoAllocator) { ranges.clear(); valnos.clear(); std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(RHS.reg); diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index b3e9216..23cff07 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -91,7 +91,7 @@ void LiveIntervals::releaseMemory() { r2iMap_.clear(); // Release VNInfo memroy regions after all VNInfo objects are dtor'd. - VNInfoAllocator.Reset(); + VNInfoAllocator.DestroyAll(); while (!CloneMIs.empty()) { MachineInstr *MI = CloneMIs.back(); CloneMIs.pop_back(); @@ -819,8 +819,9 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li, unsigned ImpUse = getReMatImplicitUse(li, MI); if (ImpUse) { const LiveInterval &ImpLi = getInterval(ImpUse); - for (MachineRegisterInfo::use_iterator ri = mri_->use_begin(li.reg), - re = mri_->use_end(); ri != re; ++ri) { + for (MachineRegisterInfo::use_nodbg_iterator + ri = mri_->use_nodbg_begin(li.reg), re = mri_->use_nodbg_end(); + ri != re; ++ri) { MachineInstr *UseMI = &*ri; SlotIndex UseIdx = getInstructionIndex(UseMI); if (li.FindLiveRangeContaining(UseIdx)->valno != ValNo) @@ -1052,7 +1053,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, // all of its uses are rematerialized, simply delete it. if (MI == ReMatOrigDefMI && CanDelete) { DEBUG(dbgs() << "\t\t\t\tErasing re-materializable def: " - << MI << '\n'); + << *MI << '\n'); RemoveMachineInstrFromMaps(MI); vrm.RemoveMachineInstrFromMaps(MI); MI->eraseFromParent(); @@ -1520,6 +1521,12 @@ LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm, MachineOperand &O = ri.getOperand(); MachineInstr *MI = &*ri; ++ri; + if (MI->isDebugValue()) { + // Remove debug info for now. + O.setReg(0U); + DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI); + continue; + } if (O.isDef()) { assert(MI->isImplicitDef() && "Register def was not rewritten?"); @@ -2012,6 +2019,8 @@ unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li, E = mri_->reg_end(); I != E; ++I) { MachineOperand &O = I.getOperand(); MachineInstr *MI = O.getParent(); + if (MI->isDebugValue()) + continue; SlotIndex Index = getInstructionIndex(MI); if (pli.liveAt(Index)) ++NumConflicts; @@ -2052,7 +2061,7 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li, E = mri_->reg_end(); I != E; ++I) { MachineOperand &O = I.getOperand(); MachineInstr *MI = O.getParent(); - if (SeenMIs.count(MI)) + if (MI->isDebugValue() || SeenMIs.count(MI)) continue; SeenMIs.insert(MI); SlotIndex Index = getInstructionIndex(MI); diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp index d2f3775..798b9b9 100644 --- a/lib/CodeGen/LiveStackAnalysis.cpp +++ b/lib/CodeGen/LiveStackAnalysis.cpp @@ -36,7 +36,7 @@ void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const { void LiveStacks::releaseMemory() { // Release VNInfo memroy regions after all VNInfo objects are dtor'd. - VNInfoAllocator.Reset(); + VNInfoAllocator.DestroyAll(); S2IMap.clear(); S2RCMap.clear(); } diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 519990e..ca8ecff 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -556,17 +556,21 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { if (MI->isPHI()) NumOperandsToProcess = 1; + // Clear kill and dead markers. LV will recompute them. SmallVector<unsigned, 4> UseRegs; SmallVector<unsigned, 4> DefRegs; for (unsigned i = 0; i != NumOperandsToProcess; ++i) { - const MachineOperand &MO = MI->getOperand(i); + MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || MO.getReg() == 0) continue; unsigned MOReg = MO.getReg(); - if (MO.isUse()) + if (MO.isUse()) { + MO.setIsKill(false); UseRegs.push_back(MOReg); - if (MO.isDef()) + } else /*MO.isDef()*/ { + MO.setIsDead(false); DefRegs.push_back(MOReg); + } } // Process all uses. diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index fc8ae5f..bd0ccb4 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -23,6 +23,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Assembly/Writer.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/Debug.h" #include "llvm/Support/LeakDetector.h" #include "llvm/Support/raw_ostream.h" @@ -45,9 +46,9 @@ MCSymbol *MachineBasicBlock::getSymbol() const { const MachineFunction *MF = getParent(); MCContext &Ctx = MF->getContext(); const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix(); - return Ctx.GetOrCreateTemporarySymbol(Twine(Prefix) + "BB" + - Twine(MF->getFunctionNumber()) + "_" + - Twine(getNumber())); + return Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" + + Twine(MF->getFunctionNumber()) + "_" + + Twine(getNumber())); } @@ -459,54 +460,41 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, // conditional branch followed by an unconditional branch. DestA is the // 'true' destination and DestB is the 'false' destination. - bool MadeChange = false; - bool AddedFallThrough = false; + bool Changed = false; MachineFunction::iterator FallThru = llvm::next(MachineFunction::iterator(this)); - - if (isCond) { - // If this block ends with a conditional branch that falls through to its - // successor, set DestB as the successor. - if (DestB == 0 && FallThru != getParent()->end()) { + + if (DestA == 0 && DestB == 0) { + // Block falls through to successor. + DestA = FallThru; + DestB = FallThru; + } else if (DestA != 0 && DestB == 0) { + if (isCond) + // Block ends in conditional jump that falls through to successor. DestB = FallThru; - AddedFallThrough = true; - } } else { - // If this is an unconditional branch with no explicit dest, it must just be - // a fallthrough into DestA. - if (DestA == 0 && FallThru != getParent()->end()) { - DestA = FallThru; - AddedFallThrough = true; - } + assert(DestA && DestB && isCond && + "CFG in a bad state. Cannot correct CFG edges"); } - + + // Remove superfluous edges. I.e., those which aren't destinations of this + // basic block, duplicate edges, or landing pads. + SmallPtrSet<const MachineBasicBlock*, 8> SeenMBBs; MachineBasicBlock::succ_iterator SI = succ_begin(); - MachineBasicBlock *OrigDestA = DestA, *OrigDestB = DestB; while (SI != succ_end()) { const MachineBasicBlock *MBB = *SI; - if (MBB == DestA) { - DestA = 0; - ++SI; - } else if (MBB == DestB) { - DestB = 0; - ++SI; - } else if (MBB->isLandingPad() && - MBB != OrigDestA && MBB != OrigDestB) { - ++SI; - } else { - // Otherwise, this is a superfluous edge, remove it. + if (!SeenMBBs.insert(MBB) || + (MBB != DestA && MBB != DestB && !MBB->isLandingPad())) { + // This is a superfluous edge, remove it. SI = removeSuccessor(SI); - MadeChange = true; + Changed = true; + } else { + ++SI; } } - if (!AddedFallThrough) - assert(DestA == 0 && DestB == 0 && "MachineCFG is missing edges!"); - else if (isCond) - assert(DestA == 0 && "MachineCFG is missing edges!"); - - return MadeChange; + return Changed; } /// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 91d3635..597d51d 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -117,17 +117,15 @@ bool MachineCSE::isPhysDefTriviallyDead(unsigned Reg, MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator E) { unsigned LookAheadLeft = 5; - while (LookAheadLeft--) { + while (LookAheadLeft) { + // Skip over dbg_value's. + while (I != E && I->isDebugValue()) + ++I; + if (I == E) // Reached end of block, register is obviously dead. return true; - if (I->isDebugValue()) { - // These must not count against the limit. - ++LookAheadLeft; - ++I; - continue; - } bool SeenDef = false; for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { const MachineOperand &MO = I->getOperand(i); @@ -143,6 +141,8 @@ bool MachineCSE::isPhysDefTriviallyDead(unsigned Reg, // See a def of Reg (or an alias) before encountering any use, it's // trivially dead. return true; + + --LookAheadLeft; ++I; } return false; @@ -294,8 +294,12 @@ bool MachineCSE::ProcessBlock(MachineDomTreeNode *Node) { bool FoundCSE = VNT.count(MI); if (!FoundCSE) { // Look for trivial copy coalescing opportunities. - if (PerformTrivialCoalescing(MI, MBB)) + if (PerformTrivialCoalescing(MI, MBB)) { + // After coalescing MI itself may become a copy. + if (isCopy(MI, TII)) + continue; FoundCSE = VNT.count(MI); + } } // FIXME: commute commutable instructions? diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 5772b2f..f6cc71f 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -460,9 +460,7 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, SmallString<60> Name; raw_svector_ostream(Name) << Prefix << "JTI" << getFunctionNumber() << '_' << JTI; - if (isLinkerPrivate) - return Ctx.GetOrCreateSymbol(Name.str()); - return Ctx.GetOrCreateTemporarySymbol(Name.str()); + return Ctx.GetOrCreateSymbol(Name.str()); } diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index af48e9e..ad4f01b 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -44,6 +44,10 @@ public: MMIAddrLabelMapCallbackPtr() : Map(0) {} MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(0) {} + void setPtr(BasicBlock *BB) { + ValueHandleBase::operator=(BB); + } + void setMap(MMIAddrLabelMap *map) { Map = map; } virtual void deleted(); @@ -209,7 +213,7 @@ void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) { // If New is not address taken, just move our symbol over to it. if (NewEntry.Symbols.isNull()) { - BBCallbacks[OldEntry.Index] = New; // Update the callback. + BBCallbacks[OldEntry.Index].setPtr(New); // Update the callback. NewEntry = OldEntry; // Set New's entry. return; } diff --git a/lib/CodeGen/OptimizeExts.cpp b/lib/CodeGen/OptimizeExts.cpp index acb6869..41fc204 100644 --- a/lib/CodeGen/OptimizeExts.cpp +++ b/lib/CodeGen/OptimizeExts.cpp @@ -73,6 +73,9 @@ FunctionPass *llvm::createOptimizeExtsPass() { return new OptimizeExts(); } /// the source, and if the source value is preserved as a sub-register of /// the result, then replace all reachable uses of the source with the subreg /// of the result. +/// Do not generate an EXTRACT that is used only in a debug use, as this +/// changes the code. Since this code does not currently share EXTRACTs, just +/// ignore all debug uses. bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &LocalMIs) { bool Changed = false; @@ -84,17 +87,17 @@ bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB, TargetRegisterInfo::isPhysicalRegister(SrcReg)) return false; - MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg); - if (++UI == MRI->use_end()) + MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg); + if (++UI == MRI->use_nodbg_end()) // No other uses. return false; // Ok, the source has other uses. See if we can replace the other uses // with use of the result of the extension. SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; - UI = MRI->use_begin(DstReg); - for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE; - ++UI) + UI = MRI->use_nodbg_begin(DstReg); + for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); + UI != UE; ++UI) ReachedBBs.insert(UI->getParent()); bool ExtendLife = true; @@ -103,9 +106,9 @@ bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB, // Uses that the result of the instruction can reach. SmallVector<MachineOperand*, 8> ExtendedUses; - UI = MRI->use_begin(SrcReg); - for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE; - ++UI) { + UI = MRI->use_nodbg_begin(SrcReg); + for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); + UI != UE; ++UI) { MachineOperand &UseMO = UI.getOperand(); MachineInstr *UseMI = &*UI; if (UseMI == MI) @@ -147,9 +150,9 @@ bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB, // Look for PHI uses of the extended result, we don't want to extend the // liveness of a PHI input. It breaks all kinds of assumptions down // stream. A PHI use is expected to be the kill of its source values. - UI = MRI->use_begin(DstReg); - for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE; - ++UI) + UI = MRI->use_nodbg_begin(DstReg); + for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); + UI != UE; ++UI) if (UI->isPHI()) PHIBBs.insert(UI->getParent()); diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 8bbe0a7..f0057ce 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -74,7 +74,7 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) { E = ImpDefs.end(); I != E; ++I) { MachineInstr *DefMI = *I; unsigned DefReg = DefMI->getOperand(0).getReg(); - if (MRI->use_empty(DefReg)) + if (MRI->use_nodbg_empty(DefReg)) DefMI->eraseFromParent(); } diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp index 70e91aa..2d49beb 100644 --- a/lib/CodeGen/PreAllocSplitting.cpp +++ b/lib/CodeGen/PreAllocSplitting.cpp @@ -665,7 +665,7 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us /// ReconstructLiveInterval - Recompute a live interval from scratch. void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { - BumpPtrAllocator& Alloc = LIs->getVNInfoAllocator(); + VNInfo::Allocator& Alloc = LIs->getVNInfoAllocator(); // Clear the old ranges and valnos; LI->clear(); diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp index 2c69065..0ef041e 100644 --- a/lib/CodeGen/RegAllocLocal.cpp +++ b/lib/CodeGen/RegAllocLocal.cpp @@ -118,8 +118,8 @@ namespace { bool isVirtRegModified(unsigned Reg) const { assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); - assert(Reg - TargetRegisterInfo::FirstVirtualRegister < VirtRegModified.size() - && "Illegal virtual register!"); + assert(Reg - TargetRegisterInfo::FirstVirtualRegister < + VirtRegModified.size() && "Illegal virtual register!"); return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister]; } @@ -135,15 +135,16 @@ namespace { if (PhysRegsUseOrder.empty() || PhysRegsUseOrder.back() == Reg) return; // Already most recently used - for (unsigned i = PhysRegsUseOrder.size(); i != 0; --i) - if (areRegsEqual(Reg, PhysRegsUseOrder[i-1])) { - unsigned RegMatch = PhysRegsUseOrder[i-1]; // remove from middle - PhysRegsUseOrder.erase(PhysRegsUseOrder.begin()+i-1); - // Add it to the end of the list - PhysRegsUseOrder.push_back(RegMatch); - if (RegMatch == Reg) - return; // Found an exact match, exit early - } + for (unsigned i = PhysRegsUseOrder.size(); i != 0; --i) { + unsigned RegMatch = PhysRegsUseOrder[i-1]; // remove from middle + if (!areRegsEqual(Reg, RegMatch)) continue; + + PhysRegsUseOrder.erase(PhysRegsUseOrder.begin()+i-1); + // Add it to the end of the list + PhysRegsUseOrder.push_back(RegMatch); + if (RegMatch == Reg) + return; // Found an exact match, exit early + } } public: @@ -267,7 +268,7 @@ int RALocal::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), RC->getAlignment()); - // Assign the slot... + // Assign the slot. StackSlotForVirtReg[VirtReg] = FrameIdx; return FrameIdx; } @@ -337,15 +338,19 @@ void RALocal::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!"); if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs) spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg); - } else { - // If the selected register aliases any other registers, we must make - // sure that one of the aliases isn't alive. - for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); - *AliasSet; ++AliasSet) - if (PhysRegsUsed[*AliasSet] != -1 && // Spill aliased register. - PhysRegsUsed[*AliasSet] != -2) // If allocatable. - if (PhysRegsUsed[*AliasSet]) - spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet); + return; + } + + // If the selected register aliases any other registers, we must make + // sure that one of the aliases isn't alive. + for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] == -1 || // Spill aliased register. + PhysRegsUsed[*AliasSet] == -2) // If allocatable. + continue; + + if (PhysRegsUsed[*AliasSet]) + spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet); } } @@ -410,58 +415,63 @@ unsigned RALocal::getReg(MachineBasicBlock &MBB, MachineInstr *I, // First check to see if we have a free register of the requested type... unsigned PhysReg = NoFree ? 0 : getFreeReg(RC); + if (PhysReg != 0) { + // Assign the register. + assignVirtToPhysReg(VirtReg, PhysReg); + return PhysReg; + } + // If we didn't find an unused register, scavenge one now! - if (PhysReg == 0) { - assert(!PhysRegsUseOrder.empty() && "No allocated registers??"); - - // Loop over all of the preallocated registers from the least recently used - // to the most recently used. When we find one that is capable of holding - // our register, use it. - for (unsigned i = 0; PhysReg == 0; ++i) { - assert(i != PhysRegsUseOrder.size() && - "Couldn't find a register of the appropriate class!"); - - unsigned R = PhysRegsUseOrder[i]; - - // We can only use this register if it holds a virtual register (ie, it - // can be spilled). Do not use it if it is an explicitly allocated - // physical register! - assert(PhysRegsUsed[R] != -1 && - "PhysReg in PhysRegsUseOrder, but is not allocated?"); - if (PhysRegsUsed[R] && PhysRegsUsed[R] != -2) { - // If the current register is compatible, use it. - if (RC->contains(R)) { - PhysReg = R; - break; - } else { - // If one of the registers aliased to the current register is - // compatible, use it. - for (const unsigned *AliasIt = TRI->getAliasSet(R); - *AliasIt; ++AliasIt) { - if (RC->contains(*AliasIt) && - // If this is pinned down for some reason, don't use it. For - // example, if CL is pinned, and we run across CH, don't use - // CH as justification for using scavenging ECX (which will - // fail). - PhysRegsUsed[*AliasIt] != 0 && - - // Make sure the register is allocatable. Don't allocate SIL on - // x86-32. - PhysRegsUsed[*AliasIt] != -2) { - PhysReg = *AliasIt; // Take an aliased register - break; - } - } - } + assert(!PhysRegsUseOrder.empty() && "No allocated registers??"); + + // Loop over all of the preallocated registers from the least recently used + // to the most recently used. When we find one that is capable of holding + // our register, use it. + for (unsigned i = 0; PhysReg == 0; ++i) { + assert(i != PhysRegsUseOrder.size() && + "Couldn't find a register of the appropriate class!"); + + unsigned R = PhysRegsUseOrder[i]; + + // We can only use this register if it holds a virtual register (ie, it + // can be spilled). Do not use it if it is an explicitly allocated + // physical register! + assert(PhysRegsUsed[R] != -1 && + "PhysReg in PhysRegsUseOrder, but is not allocated?"); + if (PhysRegsUsed[R] && PhysRegsUsed[R] != -2) { + // If the current register is compatible, use it. + if (RC->contains(R)) { + PhysReg = R; + break; + } + + // If one of the registers aliased to the current register is + // compatible, use it. + for (const unsigned *AliasIt = TRI->getAliasSet(R); + *AliasIt; ++AliasIt) { + if (!RC->contains(*AliasIt)) continue; + + // If this is pinned down for some reason, don't use it. For + // example, if CL is pinned, and we run across CH, don't use + // CH as justification for using scavenging ECX (which will + // fail). + if (PhysRegsUsed[*AliasIt] == 0) continue; + + // Make sure the register is allocatable. Don't allocate SIL on + // x86-32. + if (PhysRegsUsed[*AliasIt] == -2) continue; + + PhysReg = *AliasIt; // Take an aliased register + break; } } + } - assert(PhysReg && "Physical register not assigned!?!?"); + assert(PhysReg && "Physical register not assigned!?!?"); - // At this point PhysRegsUseOrder[i] is the least recently used register of - // compatible register class. Spill it to memory and reap its remains. - spillPhysReg(MBB, I, PhysReg); - } + // At this point PhysRegsUseOrder[i] is the least recently used register of + // compatible register class. Spill it to memory and reap its remains. + spillPhysReg(MBB, I, PhysReg); // Now that we know which register we need to assign this to, do it now! assignVirtToPhysReg(VirtReg, PhysReg); @@ -543,17 +553,17 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, } for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); *SubRegs; ++SubRegs) { - if (!ReloadedRegs.insert(*SubRegs)) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Ran out of registers during register allocation!"; - if (MI->isInlineAsm()) { - Msg << "\nPlease check your inline asm statement for invalid " - << "constraints:\n"; - MI->print(Msg, TM); - } - llvm_report_error(Msg.str()); + if (ReloadedRegs.insert(*SubRegs)) continue; + + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Ran out of registers during register allocation!"; + if (MI->isInlineAsm()) { + Msg << "\nPlease check your inline asm statement for invalid " + << "constraints:\n"; + MI->print(Msg, TM); } + llvm_report_error(Msg.str()); } return MI; @@ -563,7 +573,7 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, /// read/mod/write register, i.e. update partial register. static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand& MO = MI->getOperand(i); + MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && MO.isDef() && !MO.isDead()) return true; @@ -575,7 +585,7 @@ static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) { /// read/mod/write register, i.e. update partial register. static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand& MO = MI->getOperand(i); + MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && !MO.isDef() && MO.isKill()) return true; @@ -606,7 +616,7 @@ static bool precedes(MachineBasicBlock::iterator A, /// ComputeLocalLiveness - Computes liveness of registers within a basic /// block, setting the killed/dead flags as appropriate. void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { - MachineRegisterInfo& MRI = MBB.getParent()->getRegInfo(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); // Keep track of the most recently seen previous use or def of each reg, // so that we can update them with dead/kill markers. DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > LastUseDef; @@ -614,58 +624,60 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { I != E; ++I) { if (I->isDebugValue()) continue; + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - MachineOperand& MO = I->getOperand(i); + MachineOperand &MO = I->getOperand(i); // Uses don't trigger any flags, but we need to save // them for later. Also, we have to process these // _before_ processing the defs, since an instr // uses regs before it defs them. - if (MO.isReg() && MO.getReg() && MO.isUse()) { - LastUseDef[MO.getReg()] = std::make_pair(I, i); - + if (!MO.isReg() || !MO.getReg() || !MO.isUse()) + continue; + + LastUseDef[MO.getReg()] = std::make_pair(I, i); + + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; + + const unsigned *Aliases = TRI->getAliasSet(MO.getReg()); + if (Aliases == 0) + continue; + + while (*Aliases) { + DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator + alias = LastUseDef.find(*Aliases); - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; + if (alias != LastUseDef.end() && alias->second.first != I) + LastUseDef[*Aliases] = std::make_pair(I, i); - const unsigned* Aliases = TRI->getAliasSet(MO.getReg()); - if (Aliases) { - while (*Aliases) { - DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator - alias = LastUseDef.find(*Aliases); - - if (alias != LastUseDef.end() && alias->second.first != I) - LastUseDef[*Aliases] = std::make_pair(I, i); - - ++Aliases; - } - } + ++Aliases; } } for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - MachineOperand& MO = I->getOperand(i); + MachineOperand &MO = I->getOperand(i); // Defs others than 2-addr redefs _do_ trigger flag changes: // - A def followed by a def is dead // - A use followed by a def is a kill - if (MO.isReg() && MO.getReg() && MO.isDef()) { - DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator - last = LastUseDef.find(MO.getReg()); - if (last != LastUseDef.end()) { - // Check if this is a two address instruction. If so, then - // the def does not kill the use. - if (last->second.first == I && - I->isRegTiedToUseOperand(i)) - continue; - - MachineOperand& lastUD = - last->second.first->getOperand(last->second.second); - if (lastUD.isDef()) - lastUD.setIsDead(true); - else - lastUD.setIsKill(true); - } + if (!MO.isReg() || !MO.getReg() || !MO.isDef()) continue; + + DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator + last = LastUseDef.find(MO.getReg()); + if (last != LastUseDef.end()) { + // Check if this is a two address instruction. If so, then + // the def does not kill the use. + if (last->second.first == I && + I->isRegTiedToUseOperand(i)) + continue; - LastUseDef[MO.getReg()] = std::make_pair(I, i); + MachineOperand &lastUD = + last->second.first->getOperand(last->second.second); + if (lastUD.isDef()) + lastUD.setIsDead(true); + else + lastUD.setIsKill(true); } + + LastUseDef[MO.getReg()] = std::make_pair(I, i); } } @@ -687,9 +699,9 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { // in the block and determine if it is dead. for (DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator I = LastUseDef.begin(), E = LastUseDef.end(); I != E; ++I) { - MachineInstr* MI = I->second.first; + MachineInstr *MI = I->second.first; unsigned idx = I->second.second; - MachineOperand& MO = MI->getOperand(idx); + MachineOperand &MO = MI->getOperand(idx); bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(MO.getReg()); @@ -712,20 +724,21 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { // Two cases: // - used in another block // - used in the same block before it is defined (loop) - if (UI->getParent() != &MBB || - (MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI))) { - if (UI->isDebugValue()) { - UsedByDebugValueOnly = true; - continue; - } - - // A non-DBG_VALUE use means we can leave DBG_VALUE uses alone. - UsedInMultipleBlocks.set(MO.getReg() - - TargetRegisterInfo::FirstVirtualRegister); - usedOutsideBlock = true; - UsedByDebugValueOnly = false; - break; + if (UI->getParent() == &MBB && + !(MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI))) + continue; + + if (UI->isDebugValue()) { + UsedByDebugValueOnly = true; + continue; } + + // A non-DBG_VALUE use means we can leave DBG_VALUE uses alone. + UsedInMultipleBlocks.set(MO.getReg() - + TargetRegisterInfo::FirstVirtualRegister); + usedOutsideBlock = true; + UsedByDebugValueOnly = false; + break; } if (UsedByDebugValueOnly) @@ -770,11 +783,11 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { AddToPhysRegsUseOrder(Reg); for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] != -2) { - AddToPhysRegsUseOrder(*SubRegs); - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now - MF->getRegInfo().setPhysRegUsed(*SubRegs); - } + if (PhysRegsUsed[*SubRegs] == -2) continue; + + AddToPhysRegsUseOrder(*SubRegs); + PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now + MF->getRegInfo().setPhysRegUsed(*SubRegs); } } @@ -813,16 +826,16 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { SmallVector<unsigned, 8> Kills; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand& MO = MI->getOperand(i); - if (MO.isReg() && MO.isKill()) { - if (!MO.isImplicit()) - Kills.push_back(MO.getReg()); - else if (!isReadModWriteImplicitKill(MI, MO.getReg())) - // These are extra physical register kills when a sub-register - // is defined (def of a sub-register is a read/mod/write of the - // larger registers). Ignore. - Kills.push_back(MO.getReg()); - } + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isKill()) continue; + + if (!MO.isImplicit()) + Kills.push_back(MO.getReg()); + else if (!isReadModWriteImplicitKill(MI, MO.getReg())) + // These are extra physical register kills when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + Kills.push_back(MO.getReg()); } // If any physical regs are earlyclobber, spill any value they might @@ -830,45 +843,45 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { // If any virtual regs are earlyclobber, allocate them now (before // freeing inputs that are killed). if (MI->isInlineAsm()) { - for (unsigned i = 0; i != MI->getNumOperands(); ++i) { - MachineOperand& MO = MI->getOperand(i); - if (MO.isReg() && MO.isDef() && MO.isEarlyClobber() && - MO.getReg()) { - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { - unsigned DestVirtReg = MO.getReg(); - unsigned DestPhysReg; - - // If DestVirtReg already has a value, use it. - if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) - DestPhysReg = getReg(MBB, MI, DestVirtReg); - MF->getRegInfo().setPhysRegUsed(DestPhysReg); - markVirtRegModified(DestVirtReg); - getVirtRegLastUse(DestVirtReg) = - std::make_pair((MachineInstr*)0, 0); - DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) - << " to %reg" << DestVirtReg << "\n"); - MO.setReg(DestPhysReg); // Assign the earlyclobber register - } else { - unsigned Reg = MO.getReg(); - if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. - // These are extra physical register defs when a sub-register - // is defined (def of a sub-register is a read/mod/write of the - // larger registers). Ignore. - if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; - - MF->getRegInfo().setPhysRegUsed(Reg); - spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg - PhysRegsUsed[Reg] = 0; // It is free and reserved now - AddToPhysRegsUseOrder(Reg); - - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] != -2) { - MF->getRegInfo().setPhysRegUsed(*SubRegs); - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now - AddToPhysRegsUseOrder(*SubRegs); - } - } + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber() || + !MO.getReg()) + continue; + + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + unsigned DestVirtReg = MO.getReg(); + unsigned DestPhysReg; + + // If DestVirtReg already has a value, use it. + if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) + DestPhysReg = getReg(MBB, MI, DestVirtReg); + MF->getRegInfo().setPhysRegUsed(DestPhysReg); + markVirtRegModified(DestVirtReg); + getVirtRegLastUse(DestVirtReg) = + std::make_pair((MachineInstr*)0, 0); + DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) + << " to %reg" << DestVirtReg << "\n"); + MO.setReg(DestPhysReg); // Assign the earlyclobber register + } else { + unsigned Reg = MO.getReg(); + if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. + // These are extra physical register defs when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; + + MF->getRegInfo().setPhysRegUsed(Reg); + spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg + PhysRegsUsed[Reg] = 0; // It is free and reserved now + AddToPhysRegsUseOrder(Reg); + + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + *SubRegs; ++SubRegs) { + if (PhysRegsUsed[*SubRegs] == -2) continue; + MF->getRegInfo().setPhysRegUsed(*SubRegs); + PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now + AddToPhysRegsUseOrder(*SubRegs); } } } @@ -894,7 +907,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { // SmallSet<unsigned, 4> ReloadedRegs; for (unsigned i = 0; i != MI->getNumOperands(); ++i) { - MachineOperand& MO = MI->getOperand(i); + MachineOperand &MO = MI->getOperand(i); // here we are looking for only used operands (never def&use) if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) @@ -923,18 +936,18 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { "Silently clearing a virtual register?"); } - if (PhysReg) { - DEBUG(dbgs() << " Last use of " << TRI->getName(PhysReg) - << "[%reg" << VirtReg <<"], removing it from live set\n"); - removePhysReg(PhysReg); - for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] != -2) { - DEBUG(dbgs() << " Last use of " - << TRI->getName(*SubRegs) << "[%reg" << VirtReg - <<"], removing it from live set\n"); - removePhysReg(*SubRegs); - } + if (!PhysReg) continue; + + DEBUG(dbgs() << " Last use of " << TRI->getName(PhysReg) + << "[%reg" << VirtReg <<"], removing it from live set\n"); + removePhysReg(PhysReg); + for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); + *SubRegs; ++SubRegs) { + if (PhysRegsUsed[*SubRegs] != -2) { + DEBUG(dbgs() << " Last use of " + << TRI->getName(*SubRegs) << "[%reg" << VirtReg + <<"], removing it from live set\n"); + removePhysReg(*SubRegs); } } } @@ -942,30 +955,31 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { // Loop over all of the operands of the instruction, spilling registers that // are defined, and marking explicit destinations in the PhysRegsUsed map. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand& MO = MI->getOperand(i); - if (MO.isReg() && MO.isDef() && !MO.isImplicit() && MO.getReg() && - !MO.isEarlyClobber() && - TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { - unsigned Reg = MO.getReg(); - if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. - // These are extra physical register defs when a sub-register - // is defined (def of a sub-register is a read/mod/write of the - // larger registers). Ignore. - if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; - - MF->getRegInfo().setPhysRegUsed(Reg); - spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg - PhysRegsUsed[Reg] = 0; // It is free and reserved now - AddToPhysRegsUseOrder(Reg); - - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] != -2) { - MF->getRegInfo().setPhysRegUsed(*SubRegs); - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now - AddToPhysRegsUseOrder(*SubRegs); - } - } + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef() || MO.isImplicit() || !MO.getReg() || + MO.isEarlyClobber() || + !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + continue; + + unsigned Reg = MO.getReg(); + if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. + // These are extra physical register defs when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; + + MF->getRegInfo().setPhysRegUsed(Reg); + spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg + PhysRegsUsed[Reg] = 0; // It is free and reserved now + AddToPhysRegsUseOrder(Reg); + + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + *SubRegs; ++SubRegs) { + if (PhysRegsUsed[*SubRegs] == -2) continue; + + MF->getRegInfo().setPhysRegUsed(*SubRegs); + PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now + AddToPhysRegsUseOrder(*SubRegs); } } @@ -982,18 +996,18 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { MF->getRegInfo().setPhysRegUsed(Reg); for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] != -2) { - AddToPhysRegsUseOrder(*SubRegs); - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now - MF->getRegInfo().setPhysRegUsed(*SubRegs); - } + if (PhysRegsUsed[*SubRegs] == -2) continue; + + AddToPhysRegsUseOrder(*SubRegs); + PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now + MF->getRegInfo().setPhysRegUsed(*SubRegs); } } } SmallVector<unsigned, 8> DeadDefs; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand& MO = MI->getOperand(i); + MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isDead()) DeadDefs.push_back(MO.getReg()); } @@ -1004,45 +1018,46 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { // we need to scavenge a register. // for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand& MO = MI->getOperand(i); - if (MO.isReg() && MO.isDef() && MO.getReg() && - !MO.isEarlyClobber() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())) { - unsigned DestVirtReg = MO.getReg(); - unsigned DestPhysReg; - - // If DestVirtReg already has a value, use it. - if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) { - // If this is a copy try to reuse the input as the output; - // that will make the copy go away. - // If this is a copy, the source reg is a phys reg, and - // that reg is available, use that phys reg for DestPhysReg. - // If this is a copy, the source reg is a virtual reg, and - // the phys reg that was assigned to that virtual reg is now - // available, use that phys reg for DestPhysReg. (If it's now - // available that means this was the last use of the source.) - if (isCopy && - TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) && - isPhysRegAvailable(SrcCopyReg)) { - DestPhysReg = SrcCopyReg; - assignVirtToPhysReg(DestVirtReg, DestPhysReg); - } else if (isCopy && - TargetRegisterInfo::isVirtualRegister(SrcCopyReg) && - SrcCopyPhysReg && isPhysRegAvailable(SrcCopyPhysReg) && - MF->getRegInfo().getRegClass(DestVirtReg)-> - contains(SrcCopyPhysReg)) { - DestPhysReg = SrcCopyPhysReg; - assignVirtToPhysReg(DestVirtReg, DestPhysReg); - } else - DestPhysReg = getReg(MBB, MI, DestVirtReg); - } - MF->getRegInfo().setPhysRegUsed(DestPhysReg); - markVirtRegModified(DestVirtReg); - getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0); - DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) - << " to %reg" << DestVirtReg << "\n"); - MO.setReg(DestPhysReg); // Assign the output register + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef() || !MO.getReg() || + MO.isEarlyClobber() || + !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + + unsigned DestVirtReg = MO.getReg(); + unsigned DestPhysReg; + + // If DestVirtReg already has a value, use it. + if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) { + // If this is a copy try to reuse the input as the output; + // that will make the copy go away. + // If this is a copy, the source reg is a phys reg, and + // that reg is available, use that phys reg for DestPhysReg. + // If this is a copy, the source reg is a virtual reg, and + // the phys reg that was assigned to that virtual reg is now + // available, use that phys reg for DestPhysReg. (If it's now + // available that means this was the last use of the source.) + if (isCopy && + TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) && + isPhysRegAvailable(SrcCopyReg)) { + DestPhysReg = SrcCopyReg; + assignVirtToPhysReg(DestVirtReg, DestPhysReg); + } else if (isCopy && + TargetRegisterInfo::isVirtualRegister(SrcCopyReg) && + SrcCopyPhysReg && isPhysRegAvailable(SrcCopyPhysReg) && + MF->getRegInfo().getRegClass(DestVirtReg)-> + contains(SrcCopyPhysReg)) { + DestPhysReg = SrcCopyPhysReg; + assignVirtToPhysReg(DestVirtReg, DestPhysReg); + } else + DestPhysReg = getReg(MBB, MI, DestVirtReg); } + MF->getRegInfo().setPhysRegUsed(DestPhysReg); + markVirtRegModified(DestVirtReg); + getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0); + DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) + << " to %reg" << DestVirtReg << "\n"); + MO.setReg(DestPhysReg); // Assign the output register } // If this instruction defines any registers that are immediately dead, @@ -1059,21 +1074,20 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { } else if (PhysRegsUsed[PhysReg] == -2) { // Unallocatable register dead, ignore. continue; - } - - if (PhysReg) { - DEBUG(dbgs() << " Register " << TRI->getName(PhysReg) - << " [%reg" << VirtReg - << "] is never used, removing it from live set\n"); - removePhysReg(PhysReg); - for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); - *AliasSet; ++AliasSet) { - if (PhysRegsUsed[*AliasSet] != -2) { - DEBUG(dbgs() << " Register " << TRI->getName(*AliasSet) - << " [%reg" << *AliasSet - << "] is never used, removing it from live set\n"); - removePhysReg(*AliasSet); - } + } else if (!PhysReg) + continue; + + DEBUG(dbgs() << " Register " << TRI->getName(PhysReg) + << " [%reg" << VirtReg + << "] is never used, removing it from live set\n"); + removePhysReg(PhysReg); + for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + DEBUG(dbgs() << " Register " << TRI->getName(*AliasSet) + << " [%reg" << *AliasSet + << "] is never used, removing it from live set\n"); + removePhysReg(*AliasSet); } } } @@ -1143,8 +1157,10 @@ bool RALocal::runOnMachineFunction(MachineFunction &Fn) { StackSlotForVirtReg.grow(LastVirtReg); Virt2PhysRegMap.grow(LastVirtReg); Virt2LastUseMap.grow(LastVirtReg); - VirtRegModified.resize(LastVirtReg+1-TargetRegisterInfo::FirstVirtualRegister); - UsedInMultipleBlocks.resize(LastVirtReg+1-TargetRegisterInfo::FirstVirtualRegister); + VirtRegModified.resize(LastVirtReg+1 - + TargetRegisterInfo::FirstVirtualRegister); + UsedInMultipleBlocks.resize(LastVirtReg+1 - + TargetRegisterInfo::FirstVirtualRegister); // Loop over all of the basic blocks, eliminating virtual register references for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index e532ade..ecc49e2 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -248,48 +248,47 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { unsigned DataLatency = SU->Latency; for (unsigned i = 0, e = UseList.size(); i != e; ++i) { SUnit *UseSU = UseList[i]; - if (UseSU != SU) { - unsigned LDataLatency = DataLatency; - // Optionally add in a special extra latency for nodes that - // feed addresses. - // TODO: Do this for register aliases too. - // TODO: Perhaps we should get rid of - // SpecialAddressLatency and just move this into - // adjustSchedDependency for the targets that care about - // it. - if (SpecialAddressLatency != 0 && !UnitLatencies) { - MachineInstr *UseMI = UseSU->getInstr(); - const TargetInstrDesc &UseTID = UseMI->getDesc(); - int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg); - assert(RegUseIndex >= 0 && "UseMI doesn's use register!"); - if ((UseTID.mayLoad() || UseTID.mayStore()) && - (unsigned)RegUseIndex < UseTID.getNumOperands() && - UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass()) - LDataLatency += SpecialAddressLatency; - } - // Adjust the dependence latency using operand def/use - // information (if any), and then allow the target to - // perform its own adjustments. - const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg); - if (!UnitLatencies) { - ComputeOperandLatency(SU, UseSU, (SDep &)dep); - ST.adjustSchedDependency(SU, UseSU, (SDep &)dep); - } - UseSU->addPred(dep); + if (UseSU == SU) + continue; + unsigned LDataLatency = DataLatency; + // Optionally add in a special extra latency for nodes that + // feed addresses. + // TODO: Do this for register aliases too. + // TODO: Perhaps we should get rid of + // SpecialAddressLatency and just move this into + // adjustSchedDependency for the targets that care about it. + if (SpecialAddressLatency != 0 && !UnitLatencies) { + MachineInstr *UseMI = UseSU->getInstr(); + const TargetInstrDesc &UseTID = UseMI->getDesc(); + int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg); + assert(RegUseIndex >= 0 && "UseMI doesn's use register!"); + if ((UseTID.mayLoad() || UseTID.mayStore()) && + (unsigned)RegUseIndex < UseTID.getNumOperands() && + UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass()) + LDataLatency += SpecialAddressLatency; } + // Adjust the dependence latency using operand def/use + // information (if any), and then allow the target to + // perform its own adjustments. + const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg); + if (!UnitLatencies) { + ComputeOperandLatency(SU, UseSU, (SDep &)dep); + ST.adjustSchedDependency(SU, UseSU, (SDep &)dep); + } + UseSU->addPred(dep); } for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { std::vector<SUnit *> &UseList = Uses[*Alias]; for (unsigned i = 0, e = UseList.size(); i != e; ++i) { SUnit *UseSU = UseList[i]; - if (UseSU != SU) { - const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias); - if (!UnitLatencies) { - ComputeOperandLatency(SU, UseSU, (SDep &)dep); - ST.adjustSchedDependency(SU, UseSU, (SDep &)dep); - } - UseSU->addPred(dep); + if (UseSU == SU) + continue; + const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias); + if (!UnitLatencies) { + ComputeOperandLatency(SU, UseSU, (SDep &)dep); + ST.adjustSchedDependency(SU, UseSU, (SDep &)dep); } + UseSU->addPred(dep); } } @@ -528,7 +527,8 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, MachineInstr *DefMI = Def->getInstr(); int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); if (DefIdx != -1) { - int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(), DefIdx); + int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(), + DefIdx); if (DefCycle >= 0) { MachineInstr *UseMI = Use->getInstr(); const unsigned UseClass = UseMI->getDesc().getSchedClass(); diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index aa283ad..a336e0a 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5022,18 +5022,6 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); - // Try to infer better alignment information than the load already has. - if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { - if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { - if (Align > LD->getAlignment()) - return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), - LD->getValueType(0), - Chain, Ptr, LD->getSrcValue(), - LD->getSrcValueOffset(), LD->getMemoryVT(), - LD->isVolatile(), LD->isNonTemporal(), Align); - } - } - // If load is not volatile and there are no uses of the loaded value (and // the updated indexed value in case of indexed loads), change uses of the // chain value into uses of the chain input (i.e. delete the dead load). @@ -5099,6 +5087,18 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { } } + // Try to infer better alignment information than the load already has. + if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { + if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { + if (Align > LD->getAlignment()) + return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), + LD->getValueType(0), + Chain, Ptr, LD->getSrcValue(), + LD->getSrcValueOffset(), LD->getMemoryVT(), + LD->isVolatile(), LD->isNonTemporal(), Align); + } + } + if (CombinerAA) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -5250,17 +5250,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { SDValue Value = ST->getValue(); SDValue Ptr = ST->getBasePtr(); - // Try to infer better alignment information than the store already has. - if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { - if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { - if (Align > ST->getAlignment()) - return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, - Ptr, ST->getSrcValue(), - ST->getSrcValueOffset(), ST->getMemoryVT(), - ST->isVolatile(), ST->isNonTemporal(), Align); - } - } - // If this is a store of a bit convert, store the input value if the // resultant store does not need a higher alignment than the original. if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() && @@ -5351,6 +5340,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } } + // Try to infer better alignment information than the store already has. + if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { + if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { + if (Align > ST->getAlignment()) + return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, + Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->getMemoryVT(), + ST->isVolatile(), ST->isNonTemporal(), Align); + } + } + if (CombinerAA) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 3fc30ff..e4e9ef4 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -340,10 +340,9 @@ bool FastISel::SelectCall(User *I) { StaticAllocaMap.find(AI); if (SI == StaticAllocaMap.end()) break; // VLAs. int FI = SI->second; - if (MMI) { - if (MDNode *Dbg = DI->getMetadata("dbg")) - MMI->setVariableDbgInfo(DI->getVariable(), FI, Dbg); - } + if (MDNode *Dbg = DI->getDbgMetadata()) + MMI->setVariableDbgInfo(DI->getVariable(), FI, Dbg); + // Building the map above is target independent. Generating DBG_VALUE // inline is target dependent; do this now. (void)TargetSelectInstruction(cast<Instruction>(I)); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 50f4c32..4fb2aa2 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -281,8 +281,17 @@ unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { GlobalVariable *llvm::ExtractTypeInfo(Value *V) { V = V->stripPointerCasts(); GlobalVariable *GV = dyn_cast<GlobalVariable>(V); - assert ((GV || isa<ConstantPointerNull>(V)) && - "TypeInfo must be a global variable or NULL"); + + if (GV && GV->getName() == ".llvm.eh.catch.all.value") { + assert(GV->hasInitializer() && + "The EH catch-all value must have an initializer"); + Value *Init = GV->getInitializer(); + GV = dyn_cast<GlobalVariable>(Init); + if (!GV) V = cast<ConstantPointerNull>(Init); + } + + assert((GV || isa<ConstantPointerNull>(V)) && + "TypeInfo must be a global variable or NULL"); return GV; } diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index fda094d3..28ba343 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -264,7 +264,8 @@ void InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, const TargetInstrDesc *II, - DenseMap<SDValue, unsigned> &VRBaseMap) { + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsDebug) { assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Flag && "Chain and flag operands should occur at end of operand list!"); @@ -295,7 +296,11 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, } } - MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef)); + MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef, + false/*isImp*/, false/*isKill*/, + false/*isDead*/, false/*isUndef*/, + false/*isEarlyClobber*/, + 0/*SubReg*/, IsDebug)); } /// AddOperand - Add the specified operand to the specified machine instr. II @@ -305,9 +310,10 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, const TargetInstrDesc *II, - DenseMap<SDValue, unsigned> &VRBaseMap) { + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsDebug) { if (Op.isMachineOpcode()) { - AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap); + AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, IsDebug); } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { MI->addOperand(MachineOperand::CreateImm(C->getSExtValue())); } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) { @@ -356,7 +362,7 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Flag && "Chain and flag operands should occur at end of operand list!"); - AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap); + AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, IsDebug); } } @@ -498,164 +504,156 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, assert(isNew && "Node emitted out of order - early"); } -/// EmitDbgValue - Generate any debug info that refers to this Node. Constant -/// dbg_value is not handled here. -void -InstrEmitter::EmitDbgValue(SDNode *Node, - DenseMap<SDValue, unsigned> &VRBaseMap, - SDDbgValue *sd) { - if (!Node->getHasDebugValue()) - return; - if (!sd) - return; - assert(sd->getKind() == SDDbgValue::SDNODE); - unsigned VReg = getVR(SDValue(sd->getSDNode(), sd->getResNo()), VRBaseMap); - const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); - DebugLoc DL = sd->getDebugLoc(); - MachineInstr *MI; - if (VReg) { - MI = BuildMI(*MF, DL, II).addReg(VReg, RegState::Debug). - addImm(sd->getOffset()). - addMetadata(sd->getMDPtr()); - } else { - // Insert an Undef so we can see what we dropped. - MI = BuildMI(*MF, DL, II).addReg(0U).addImm(sd->getOffset()). - addMetadata(sd->getMDPtr()); - } - MBB->insert(InsertPos, MI); -} - -/// EmitDbgValue - Generate debug info that does not refer to a SDNode. -void -InstrEmitter::EmitDbgValue(SDDbgValue *sd, +/// EmitDbgValue - Generate machine instruction for a dbg_value node. +/// +MachineInstr *InstrEmitter::EmitDbgValue(SDDbgValue *SD, + MachineBasicBlock *InsertBB, + DenseMap<SDValue, unsigned> &VRBaseMap, DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { - if (!sd) - return; + uint64_t Offset = SD->getOffset(); + MDNode* MDPtr = SD->getMDPtr(); + DebugLoc DL = SD->getDebugLoc(); + const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); - uint64_t Offset = sd->getOffset(); - MDNode* mdPtr = sd->getMDPtr(); - SDDbgValue::DbgValueKind kind = sd->getKind(); - DebugLoc DL = sd->getDebugLoc(); - MachineInstr* MI; - if (kind == SDDbgValue::CONST) { - Value *V = sd->getConst(); + MachineInstrBuilder MIB = BuildMI(*MF, DL, II); + if (SD->getKind() == SDDbgValue::SDNODE) { + AddOperand(&*MIB, SDValue(SD->getSDNode(), SD->getResNo()), + (*MIB).getNumOperands(), &II, VRBaseMap, true /*IsDebug*/); + } else if (SD->getKind() == SDDbgValue::CONST) { + Value *V = SD->getConst(); if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - MI = BuildMI(*MF, DL, II).addImm(CI->getZExtValue()). - addImm(Offset).addMetadata(mdPtr); + MIB.addImm(CI->getSExtValue()); } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) { - MI = BuildMI(*MF, DL, II).addFPImm(CF). - addImm(Offset).addMetadata(mdPtr); + MIB.addFPImm(CF); } else { // Could be an Undef. In any case insert an Undef so we can see what we // dropped. - MI = BuildMI(*MF, DL, II).addReg(0U). - addImm(Offset).addMetadata(mdPtr); + MIB.addReg(0U); } - } else if (kind == SDDbgValue::FRAMEIX) { - unsigned FrameIx = sd->getFrameIx(); + } else if (SD->getKind() == SDDbgValue::FRAMEIX) { + unsigned FrameIx = SD->getFrameIx(); // Stack address; this needs to be lowered in target-dependent fashion. // FIXME test that the target supports this somehow; if not emit Undef. // Create a pseudo for EmitInstrWithCustomInserter's consumption. - MI = BuildMI(*MF, DL, II).addImm(FrameIx). - addImm(Offset).addMetadata(mdPtr); - MBB = TLI->EmitInstrWithCustomInserter(MI, MBB, EM); - InsertPos = MBB->end(); - return; + MIB.addImm(FrameIx).addImm(Offset).addMetadata(MDPtr); + abort(); + TLI->EmitInstrWithCustomInserter(&*MIB, InsertBB, EM); + return 0; } else { // Insert an Undef so we can see what we dropped. - MI = BuildMI(*MF, DL, II).addReg(0U). - addImm(Offset).addMetadata(mdPtr); + MIB.addReg(0U); } - MBB->insert(InsertPos, MI); + + MIB.addImm(Offset).addMetadata(MDPtr); + return &*MIB; } -/// EmitNode - Generate machine code for a node and needed dependencies. +/// EmitMachineNode - Generate machine code for a target-specific node and +/// needed dependencies. /// -void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { - // If machine instruction - if (Node->isMachineOpcode()) { - unsigned Opc = Node->getMachineOpcode(); - - // Handle subreg insert/extract specially - if (Opc == TargetOpcode::EXTRACT_SUBREG || - Opc == TargetOpcode::INSERT_SUBREG || - Opc == TargetOpcode::SUBREG_TO_REG) { - EmitSubregNode(Node, VRBaseMap); - return; - } +void InstrEmitter:: +EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap, + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { + unsigned Opc = Node->getMachineOpcode(); + + // Handle subreg insert/extract specially + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG) { + EmitSubregNode(Node, VRBaseMap); + return; + } - // Handle COPY_TO_REGCLASS specially. - if (Opc == TargetOpcode::COPY_TO_REGCLASS) { - EmitCopyToRegClassNode(Node, VRBaseMap); - return; - } + // Handle COPY_TO_REGCLASS specially. + if (Opc == TargetOpcode::COPY_TO_REGCLASS) { + EmitCopyToRegClassNode(Node, VRBaseMap); + return; + } - if (Opc == TargetOpcode::IMPLICIT_DEF) - // We want a unique VR for each IMPLICIT_DEF use. - return; - - const TargetInstrDesc &II = TII->get(Opc); - unsigned NumResults = CountResults(Node); - unsigned NodeOperands = CountOperands(Node); - bool HasPhysRegOuts = (NumResults > II.getNumDefs()) && - II.getImplicitDefs() != 0; + if (Opc == TargetOpcode::IMPLICIT_DEF) + // We want a unique VR for each IMPLICIT_DEF use. + return; + + const TargetInstrDesc &II = TII->get(Opc); + unsigned NumResults = CountResults(Node); + unsigned NodeOperands = CountOperands(Node); + bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; #ifndef NDEBUG - unsigned NumMIOperands = NodeOperands + NumResults; - assert((II.getNumOperands() == NumMIOperands || - HasPhysRegOuts || II.isVariadic()) && - "#operands for dag node doesn't match .td file!"); + unsigned NumMIOperands = NodeOperands + NumResults; + if (II.isVariadic()) + assert(NumMIOperands >= II.getNumOperands() && + "Too few operands for a variadic node!"); + else + assert(NumMIOperands >= II.getNumOperands() && + NumMIOperands <= II.getNumOperands()+II.getNumImplicitDefs() && + "#operands for dag node doesn't match .td file!"); #endif - // Create the new machine instruction. - MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II); - - // Add result register values for things that are defined by this - // instruction. - if (NumResults) - CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap); - - // Emit all of the actual operands of this instruction, adding them to the - // instruction as appropriate. - bool HasOptPRefs = II.getNumDefs() > NumResults; - assert((!HasOptPRefs || !HasPhysRegOuts) && - "Unable to cope with optional defs and phys regs defs!"); - unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0; - for (unsigned i = NumSkip; i != NodeOperands; ++i) - AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II, - VRBaseMap); - - // Transfer all of the memory reference descriptions of this instruction. - MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(), - cast<MachineSDNode>(Node)->memoperands_end()); - - if (II.usesCustomInsertionHook()) { - // Insert this instruction into the basic block using a target - // specific inserter which may returns a new basic block. - MBB = TLI->EmitInstrWithCustomInserter(MI, MBB, EM); - InsertPos = MBB->end(); - } else { - MBB->insert(InsertPos, MI); - } + // Create the new machine instruction. + MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II); + + // Add result register values for things that are defined by this + // instruction. + if (NumResults) + CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap); + + // Emit all of the actual operands of this instruction, adding them to the + // instruction as appropriate. + bool HasOptPRefs = II.getNumDefs() > NumResults; + assert((!HasOptPRefs || !HasPhysRegOuts) && + "Unable to cope with optional defs and phys regs defs!"); + unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0; + for (unsigned i = NumSkip; i != NodeOperands; ++i) + AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II, + VRBaseMap); + + // Transfer all of the memory reference descriptions of this instruction. + MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(), + cast<MachineSDNode>(Node)->memoperands_end()); + + if (II.usesCustomInsertionHook()) { + // Insert this instruction into the basic block using a target + // specific inserter which may returns a new basic block. + MBB = TLI->EmitInstrWithCustomInserter(MI, MBB, EM); + InsertPos = MBB->end(); + return; + } + + MBB->insert(InsertPos, MI); - // Additional results must be an physical register def. - if (HasPhysRegOuts) { - for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { - unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; - if (Node->hasAnyUseOfValue(i)) - EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); - // If there are no uses, mark the register as dead now, so that - // MachineLICM/Sink can see that it's dead. Don't do this if the - // node has a Flag value, for the benefit of targets still using - // Flag for values in physregs. - else if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag) - MI->addRegisterDead(Reg, TRI); - } + // Additional results must be an physical register def. + if (HasPhysRegOuts) { + for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { + unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; + if (Node->hasAnyUseOfValue(i)) + EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); + // If there are no uses, mark the register as dead now, so that + // MachineLICM/Sink can see that it's dead. Don't do this if the + // node has a Flag value, for the benefit of targets still using + // Flag for values in physregs. + else if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag) + MI->addRegisterDead(Reg, TRI); } - return; } + + // If the instruction has implicit defs and the node doesn't, mark the + // implicit def as dead. If the node has any flag outputs, we don't do this + // because we don't know what implicit defs are being used by flagged nodes. + if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag) + if (const unsigned *IDList = II.getImplicitDefs()) { + for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs(); + i != e; ++i) + MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI); + } + return; +} +/// EmitSpecialNode - Generate machine code for a target-independent node and +/// needed dependencies. +void InstrEmitter:: +EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) { switch (Node->getOpcode()) { default: #ifndef NDEBUG diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index eefcd73..baabb75 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -64,7 +64,8 @@ class InstrEmitter { void AddRegisterOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, const TargetInstrDesc *II, - DenseMap<SDValue, unsigned> &VRBaseMap); + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsDebug = false); /// AddOperand - Add the specified operand to the specified machine instr. II /// specifies the instruction information for the node, and IIOpNum is the @@ -73,7 +74,8 @@ class InstrEmitter { void AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, const TargetInstrDesc *II, - DenseMap<SDValue, unsigned> &VRBaseMap); + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsDebug = false); /// EmitSubregNode - Generate machine code for subreg nodes. /// @@ -98,22 +100,23 @@ public: /// MachineInstr. static unsigned CountOperands(SDNode *Node); - /// EmitDbgValue - Generate any debug info that refers to this Node. Constant - /// dbg_value is not handled here. - void EmitDbgValue(SDNode *Node, - DenseMap<SDValue, unsigned> &VRBaseMap, - SDDbgValue* sd); - - - /// EmitDbgValue - Generate a constant DBG_VALUE. No node is involved. - void EmitDbgValue(SDDbgValue* sd, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM); + /// EmitDbgValue - Generate machine instruction for a dbg_value node. + /// + MachineInstr *EmitDbgValue(SDDbgValue *SD, + MachineBasicBlock *InsertBB, + DenseMap<SDValue, unsigned> &VRBaseMap, + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM); /// EmitNode - Generate machine code for a node and needed dependencies. /// void EmitNode(SDNode *Node, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM); + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { + if (Node->isMachineOpcode()) + EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap, EM); + else + EmitSpecialNode(Node, IsClone, IsCloned, VRBaseMap); + } /// getBlock - Return the current basic block. MachineBasicBlock *getBlock() { return MBB; } @@ -124,6 +127,13 @@ public: /// InstrEmitter - Construct an InstrEmitter and set it to start inserting /// at the given position in the given block. InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos); + +private: + void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap, + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM); + void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap); }; } diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index dbbd753..7638ea2 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -47,10 +47,12 @@ private: uint64_t Offset; DebugLoc DL; unsigned Order; + bool Invalid; public: // Constructor for non-constants. SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, uint64_t off, DebugLoc dl, - unsigned O) : mdPtr(mdP), Offset(off), DL(dl), Order(O) { + unsigned O) : mdPtr(mdP), Offset(off), DL(dl), Order(O), + Invalid(false) { kind = SDNODE; u.s.Node = N; u.s.ResNo = R; @@ -58,14 +60,14 @@ public: // Constructor for constants. SDDbgValue(MDNode *mdP, Value *C, uint64_t off, DebugLoc dl, unsigned O) : - mdPtr(mdP), Offset(off), DL(dl), Order(O) { + mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) { kind = CONST; u.Const = C; } // Constructor for frame indices. SDDbgValue(MDNode *mdP, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) : - mdPtr(mdP), Offset(off), DL(dl), Order(O) { + mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) { kind = FRAMEIX; u.FrameIx = FI; } @@ -97,6 +99,12 @@ public: // Returns the SDNodeOrder. This is the order of the preceding node in the // input. unsigned getOrder() { return Order; } + + // setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated" + // property. A SDDbgValue is invalid if the SDNode that produces the value is + // deleted. + void setIsInvalidated() { Invalid = true; } + bool isInvalidated() { return Invalid; } }; } // end llvm namespace diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index c13565a..e7ab2f0 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -23,6 +23,7 @@ #include "llvm/Target/TargetSubtarget.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" @@ -407,19 +408,65 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { } } +namespace { + struct OrderSorter { + bool operator()(const std::pair<unsigned, MachineInstr*> &A, + const std::pair<unsigned, MachineInstr*> &B) { + return A.first < B.first; + } + }; +} + +// ProcessSourceNode - Process nodes with source order numbers. These are added +// to a vector which EmitSchedule use to determine how to insert dbg_value +// instructions in the right order. +static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, + InstrEmitter &Emitter, + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM, + DenseMap<SDValue, unsigned> &VRBaseMap, + SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, + SmallSet<unsigned, 8> &Seen) { + unsigned Order = DAG->GetOrdering(N); + if (!Order || !Seen.insert(Order)) + return; + + MachineBasicBlock *BB = Emitter.getBlock(); + if (BB->empty() || BB->back().isPHI()) { + // Did not insert any instruction. + Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); + return; + } + + Orders.push_back(std::make_pair(Order, &BB->back())); + if (!N->getHasDebugValue()) + return; + // Opportunistically insert immediate dbg_value uses, i.e. those with source + // order number right after the N. + MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); + SmallVector<SDDbgValue*,2> &DVs = DAG->GetDbgValues(N); + for (unsigned i = 0, e = DVs.size(); i != e; ++i) { + if (DVs[i]->isInvalidated()) + continue; + unsigned DVOrder = DVs[i]->getOrder(); + if (DVOrder == ++Order) { + MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], BB, VRBaseMap, EM); + Orders.push_back(std::make_pair(DVOrder, DbgMI)); + BB->insert(InsertPos, DbgMI); + DVs[i]->setIsInvalidated(); + } + } +} + + /// EmitSchedule - Emit the machine code in scheduled order. MachineBasicBlock *ScheduleDAGSDNodes:: EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { InstrEmitter Emitter(BB, InsertPos); DenseMap<SDValue, unsigned> VRBaseMap; DenseMap<SUnit*, unsigned> CopyVRBaseMap; - - // For now, any constant debug info nodes go at the beginning. - for (SDDbgInfo::ConstDbgIterator I = DAG->DbgConstBegin(), - E = DAG->DbgConstEnd(); I!=E; I++) { - Emitter.EmitDbgValue(*I, EM); - delete *I; - } + SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders; + SmallSet<unsigned, 8> Seen; + bool HasDbg = DAG->hasDebugValues(); for (unsigned i = 0, e = Sequence.size(); i != e; i++) { SUnit *SU = Sequence[i]; @@ -442,22 +489,80 @@ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { N = N->getFlaggedNode()) FlaggedNodes.push_back(N); while (!FlaggedNodes.empty()) { + SDNode *N = FlaggedNodes.back(); Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned, VRBaseMap, EM); - if (FlaggedNodes.back()->getHasDebugValue()) - if (SDDbgValue *sd = DAG->GetDbgInfo(FlaggedNodes.back())) { - Emitter.EmitDbgValue(FlaggedNodes.back(), VRBaseMap, sd); - delete sd; - } + // Remember the the source order of the inserted instruction. + if (HasDbg) + ProcessSourceNode(N, DAG, Emitter, EM, VRBaseMap, Orders, Seen); FlaggedNodes.pop_back(); } Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap, EM); - if (SU->getNode()->getHasDebugValue()) - if (SDDbgValue *sd = DAG->GetDbgInfo(SU->getNode())) { - Emitter.EmitDbgValue(SU->getNode(), VRBaseMap, sd); - delete sd; + // Remember the the source order of the inserted instruction. + if (HasDbg) + ProcessSourceNode(SU->getNode(), DAG, Emitter, EM, VRBaseMap, Orders, + Seen); + } + + // Insert all the dbg_value which have not already been inserted in source + // order sequence. + if (HasDbg) { + MachineBasicBlock::iterator BBBegin = BB->empty() ? BB->end() : BB->begin(); + while (BBBegin != BB->end() && BBBegin->isPHI()) + ++BBBegin; + + // Sort the source order instructions and use the order to insert debug + // values. + std::sort(Orders.begin(), Orders.end(), OrderSorter()); + + SDDbgInfo::DbgIterator DI = DAG->DbgBegin(); + SDDbgInfo::DbgIterator DE = DAG->DbgEnd(); + // Now emit the rest according to source order. + unsigned LastOrder = 0; + MachineInstr *LastMI = 0; + for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) { + unsigned Order = Orders[i].first; + MachineInstr *MI = Orders[i].second; + // Insert all SDDbgValue's whose order(s) are before "Order". + if (!MI) + continue; + MachineBasicBlock *MIBB = MI->getParent(); +#ifndef NDEBUG + unsigned LastDIOrder = 0; +#endif + for (; DI != DE && + (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) { +#ifndef NDEBUG + assert((*DI)->getOrder() >= LastDIOrder && + "SDDbgValue nodes must be in source order!"); + LastDIOrder = (*DI)->getOrder(); +#endif + if ((*DI)->isInvalidated()) + continue; + MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, MIBB, VRBaseMap, EM); + if (!LastOrder) + // Insert to start of the BB (after PHIs). + BB->insert(BBBegin, DbgMI); + else { + MachineBasicBlock::iterator Pos = MI; + MIBB->insert(llvm::next(Pos), DbgMI); + } } + LastOrder = Order; + LastMI = MI; + } + // Add trailing DbgValue's before the terminator. FIXME: May want to add + // some of them before one or more conditional branches? + while (DI != DE) { + MachineBasicBlock *InsertBB = Emitter.getBlock(); + MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator(); + if (!(*DI)->isInvalidated()) { + MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, InsertBB, VRBaseMap, EM); + InsertBB->insert(Pos, DbgMI); + } + ++DI; + } } BB = Emitter.getBlock(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index ed9146d..0ba65ab 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -598,8 +598,10 @@ void SelectionDAG::DeallocateNode(SDNode *N) { // Remove the ordering of this node. Ordering->remove(N); - // And its entry in the debug info table, if any. - DbgInfo->remove(N); + // If any of the SDDbgValue nodes refer to this SDNode, invalidate them. + SmallVector<SDDbgValue*, 2> &DbgVals = DbgInfo->getSDDbgValues(N); + for (unsigned i = 0, e = DbgVals.size(); i != e; ++i) + DbgVals[i]->setIsInvalidated(); } /// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that @@ -811,6 +813,7 @@ void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi, SelectionDAG::~SelectionDAG() { allnodes_clear(); delete Ordering; + DbgInfo->clear(); delete DbgInfo; } @@ -839,6 +842,7 @@ void SelectionDAG::clear() { Root = getEntryNode(); delete Ordering; Ordering = new SDNodeOrdering(); + DbgInfo->clear(); delete DbgInfo; DbgInfo = new SDDbgInfo(); } @@ -3128,11 +3132,17 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, if (Str.empty()) { if (VT.isInteger()) return DAG.getConstant(0, VT); - unsigned NumElts = VT.getVectorNumElements(); - MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, - DAG.getConstant(0, - EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts))); + else if (VT.getSimpleVT().SimpleTy == MVT::f32 || + VT.getSimpleVT().SimpleTy == MVT::f64) + return DAG.getConstantFP(0.0, VT); + else if (VT.isVector()) { + unsigned NumElts = VT.getVectorNumElements(); + MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(), + EltVT, NumElts))); + } else + llvm_unreachable("Expected type!"); } assert(!VT.isVector() && "Can't handle vector type here!"); @@ -3180,51 +3190,33 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) { return false; } -/// MeetsMaxMemopRequirement - Determines if the number of memory ops required -/// to replace the memset / memcpy is below the threshold. It also returns the -/// types of the sequence of memory ops to perform memset / memcpy. -static -bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps, - SDValue Dst, SDValue Src, - unsigned Limit, uint64_t Size, unsigned &Align, - std::string &Str, bool &isSrcStr, - SelectionDAG &DAG, - const TargetLowering &TLI) { - isSrcStr = isMemSrcFromString(Src, Str); - bool isSrcConst = isa<ConstantSDNode>(Src); - EVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG); - bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(VT); - if (VT != MVT::Other) { - const Type *Ty = VT.getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty); - // If source is a string constant, this will require an unaligned load. - if (NewAlign > Align && (isSrcConst || AllowUnalign)) { - if (Dst.getOpcode() != ISD::FrameIndex) { - // Can't change destination alignment. It requires a unaligned store. - if (AllowUnalign) - VT = MVT::Other; - } else { - int FI = cast<FrameIndexSDNode>(Dst)->getIndex(); - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); - if (MFI->isFixedObjectIndex(FI)) { - // Can't change destination alignment. It requires a unaligned store. - if (AllowUnalign) - VT = MVT::Other; - } else { - // Give the stack frame object a larger alignment if needed. - if (MFI->getObjectAlignment(FI) < NewAlign) - MFI->setObjectAlignment(FI, NewAlign); - Align = NewAlign; - } - } - } - } +/// FindOptimalMemOpLowering - Determines the optimial series memory ops +/// to replace the memset / memcpy. Return true if the number of memory ops +/// is below the threshold. It returns the types of the sequence of +/// memory ops to perform memset / memcpy by reference. +static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, + unsigned Limit, uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool SafeToUseFP, + SelectionDAG &DAG, + const TargetLowering &TLI) { + assert((SrcAlign == 0 || SrcAlign >= DstAlign) && + "Expecting memcpy / memset source to meet alignment requirement!"); + // If 'SrcAlign' is zero, that means the memory operation does not need load + // the value, i.e. memset or memcpy from constant string. Otherwise, it's + // the inferred alignment of the source. 'DstAlign', on the other hand, is the + // specified alignment of the memory operation. If it is zero, that means + // it's possible to change the alignment of the destination. + EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, SafeToUseFP, DAG); if (VT == MVT::Other) { - if (TLI.allowsUnalignedMemoryAccesses(MVT::i64)) { + VT = TLI.getPointerTy(); + const Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + if (DstAlign >= TLI.getTargetData()->getABITypeAlignment(Ty) || + TLI.allowsUnalignedMemoryAccesses(VT)) { VT = MVT::i64; } else { - switch (Align & 7) { + switch (DstAlign & 7) { case 0: VT = MVT::i64; break; case 4: VT = MVT::i32; break; case 2: VT = MVT::i16; break; @@ -3246,7 +3238,7 @@ bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps, unsigned VTSize = VT.getSizeInBits() / 8; while (VTSize > Size) { // For now, only use non-vector load / store's for the left-over pieces. - if (VT.isVector()) { + if (VT.isVector() || VT.isFloatingPoint()) { VT = MVT::i64; while (!TLI.isTypeLegal(VT)) VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); @@ -3269,11 +3261,11 @@ bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps, } static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, SDValue Dst, - SDValue Src, uint64_t Size, - unsigned Align, bool AlwaysInline, - const Value *DstSV, uint64_t DstSVOff, - const Value *SrcSV, uint64_t SrcSVOff){ + SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size, + unsigned Align, bool AlwaysInline, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Expand memcpy to a series of load and store ops if the size operand falls @@ -3282,15 +3274,33 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, uint64_t Limit = -1ULL; if (!AlwaysInline) Limit = TLI.getMaxStoresPerMemcpy(); - unsigned DstAlign = Align; // Destination alignment can change. + bool DstAlignCanChange = false; + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); + if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) + DstAlignCanChange = true; + unsigned SrcAlign = DAG.InferPtrAlignment(Src); + if (Align > SrcAlign) + SrcAlign = Align; std::string Str; - bool CopyFromStr; - if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign, - Str, CopyFromStr, DAG, TLI)) + bool CopyFromStr = isMemSrcFromString(Src, Str); + bool isZeroStr = CopyFromStr && Str.empty(); + if (!FindOptimalMemOpLowering(MemOps, Limit, Size, + (DstAlignCanChange ? 0 : Align), + (isZeroStr ? 0 : SrcAlign), true, DAG, TLI)) return SDValue(); + if (DstAlignCanChange) { + const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); + unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty); + if (NewAlign > Align) { + // Give the stack frame object a larger alignment if needed. + if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) + MFI->setObjectAlignment(FI->getIndex(), NewAlign); + Align = NewAlign; + } + } - bool isZeroStr = CopyFromStr && Str.empty(); SmallVector<SDValue, 8> OutChains; unsigned NumMemOps = MemOps.size(); uint64_t SrcOff = 0, DstOff = 0; @@ -3299,16 +3309,17 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, unsigned VTSize = VT.getSizeInBits() / 8; SDValue Value, Store; - if (CopyFromStr && (isZeroStr || !VT.isVector())) { + if (CopyFromStr && + (isZeroStr || (VT.isInteger() && !VT.isVector()))) { // It's unlikely a store of a vector immediate can be done in a single // instruction. It would require a load from a constantpool first. - // We also handle store a vector with all zero's. + // We only handle zero vectors here. // FIXME: Handle other cases where store of vector immediate is done in // a single instruction. Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff); Store = DAG.getStore(Chain, dl, Value, getMemBasePlusOffset(Dst, DstOff, DAG), - DstSV, DstSVOff + DstOff, false, false, DstAlign); + DstSV, DstSVOff + DstOff, false, false, Align); } else { // The type might not be legal for the target. This should only happen // if the type is smaller than a legal type, as on PPC, so the right @@ -3319,11 +3330,12 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, assert(NVT.bitsGE(VT)); Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, getMemBasePlusOffset(Src, SrcOff, DAG), - SrcSV, SrcSVOff + SrcOff, VT, false, false, Align); + SrcSV, SrcSVOff + SrcOff, VT, false, false, + MinAlign(SrcAlign, SrcOff)); Store = DAG.getTruncStore(Chain, dl, Value, getMemBasePlusOffset(Dst, DstOff, DAG), DstSV, DstSVOff + DstOff, VT, false, false, - DstAlign); + Align); } OutChains.push_back(Store); SrcOff += VTSize; @@ -3335,11 +3347,11 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, } static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, SDValue Dst, - SDValue Src, uint64_t Size, - unsigned Align, bool AlwaysInline, - const Value *DstSV, uint64_t DstSVOff, - const Value *SrcSV, uint64_t SrcSVOff){ + SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size, + unsigned Align,bool AlwaysInline, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Expand memmove to a series of load and store ops if the size operand falls @@ -3348,15 +3360,32 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, uint64_t Limit = -1ULL; if (!AlwaysInline) Limit = TLI.getMaxStoresPerMemmove(); - unsigned DstAlign = Align; // Destination alignment can change. - std::string Str; - bool CopyFromStr; - if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign, - Str, CopyFromStr, DAG, TLI)) + bool DstAlignCanChange = false; + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); + if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) + DstAlignCanChange = true; + unsigned SrcAlign = DAG.InferPtrAlignment(Src); + if (Align > SrcAlign) + SrcAlign = Align; + + if (!FindOptimalMemOpLowering(MemOps, Limit, Size, + (DstAlignCanChange ? 0 : Align), + SrcAlign, true, DAG, TLI)) return SDValue(); - uint64_t SrcOff = 0, DstOff = 0; + if (DstAlignCanChange) { + const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); + unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty); + if (NewAlign > Align) { + // Give the stack frame object a larger alignment if needed. + if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) + MFI->setObjectAlignment(FI->getIndex(), NewAlign); + Align = NewAlign; + } + } + uint64_t SrcOff = 0, DstOff = 0; SmallVector<SDValue, 8> LoadValues; SmallVector<SDValue, 8> LoadChains; SmallVector<SDValue, 8> OutChains; @@ -3368,7 +3397,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Value = DAG.getLoad(VT, dl, Chain, getMemBasePlusOffset(Src, SrcOff, DAG), - SrcSV, SrcSVOff + SrcOff, false, false, Align); + SrcSV, SrcSVOff + SrcOff, false, false, SrcAlign); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; @@ -3383,7 +3412,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Store = DAG.getStore(Chain, dl, LoadValues[i], getMemBasePlusOffset(Dst, DstOff, DAG), - DstSV, DstSVOff + DstOff, false, false, DstAlign); + DstSV, DstSVOff + DstOff, false, false, Align); OutChains.push_back(Store); DstOff += VTSize; } @@ -3393,24 +3422,40 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, } static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, SDValue Dst, - SDValue Src, uint64_t Size, - unsigned Align, - const Value *DstSV, uint64_t DstSVOff) { + SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size, + unsigned Align, + const Value *DstSV, uint64_t DstSVOff) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Expand memset to a series of load/store ops if the size operand // falls below a certain threshold. std::vector<EVT> MemOps; - std::string Str; - bool CopyFromStr; - if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(), - Size, Align, Str, CopyFromStr, DAG, TLI)) + bool DstAlignCanChange = false; + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); + if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) + DstAlignCanChange = true; + bool IsZero = isa<ConstantSDNode>(Src) && + cast<ConstantSDNode>(Src)->isNullValue(); + if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(), + Size, (DstAlignCanChange ? 0 : Align), 0, + IsZero, DAG, TLI)) return SDValue(); + if (DstAlignCanChange) { + const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); + unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty); + if (NewAlign > Align) { + // Give the stack frame object a larger alignment if needed. + if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) + MFI->setObjectAlignment(FI->getIndex(), NewAlign); + Align = NewAlign; + } + } + SmallVector<SDValue, 8> OutChains; uint64_t DstOff = 0; - unsigned NumMemOps = MemOps.size(); for (unsigned i = 0; i < NumMemOps; i++) { EVT VT = MemOps[i]; @@ -3441,10 +3486,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, if (ConstantSize->isNullValue()) return Chain; - SDValue Result = - getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(), - Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff); + SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(),Align, + false, DstSV, DstSVOff, SrcSV, SrcSVOff); if (Result.getNode()) return Result; } @@ -4846,6 +4890,26 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, return NULL; } +/// getDbgValue - Creates a SDDbgValue node. +/// +SDDbgValue * +SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, uint64_t Off, + DebugLoc DL, unsigned O) { + return new (Allocator) SDDbgValue(MDPtr, N, R, Off, DL, O); +} + +SDDbgValue * +SelectionDAG::getDbgValue(MDNode *MDPtr, Value *C, uint64_t Off, + DebugLoc DL, unsigned O) { + return new (Allocator) SDDbgValue(MDPtr, C, Off, DL, O); +} + +SDDbgValue * +SelectionDAG::getDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off, + DebugLoc DL, unsigned O) { + return new (Allocator) SDDbgValue(MDPtr, FI, Off, DL, O); +} + namespace { /// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node @@ -5241,24 +5305,12 @@ unsigned SelectionDAG::GetOrdering(const SDNode *SD) const { return Ordering->getOrder(SD); } -/// AssignDbgInfo - Assign debug info to the SDNode. -void SelectionDAG::AssignDbgInfo(SDNode* SD, SDDbgValue* db) { - assert(SD && "Trying to assign dbg info to a null node!"); - DbgInfo->add(SD, db); - SD->setHasDebugValue(true); -} - -/// RememberDbgInfo - Remember debug info which is not assigned to an SDNode. -void SelectionDAG::RememberDbgInfo(SDDbgValue* db) { - DbgInfo->add(db); -} - -/// GetDbgInfo - Get the debug info, if any, for the SDNode. -SDDbgValue* SelectionDAG::GetDbgInfo(const SDNode *SD) { - assert(SD && "Trying to get the order of a null node!"); - if (SD->getHasDebugValue()) - return DbgInfo->getSDDbgValue(SD); - return 0; +/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the +/// value is produced by SD. +void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD) { + DbgInfo->add(DB, SD); + if (SD) + SD->setHasDebugValue(true); } //===----------------------------------------------------------------------===// @@ -6094,8 +6146,20 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { // If this is a GlobalAddress + cst, return the alignment. GlobalValue *GV; int64_t GVOffset = 0; - if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) - return MinAlign(GV->getAlignment(), GVOffset); + if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { + // If GV has specified alignment, then use it. Otherwise, use the preferred + // alignment. + unsigned Align = GV->getAlignment(); + if (!Align) { + if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) { + if (GVar->hasInitializer()) { + const TargetData *TD = TLI.getTargetData(); + Align = TD->getPreferredAlignment(GVar); + } + } + } + return MinAlign(Align, GVOffset); + } // If this is a direct reference to a stack slot, use information about the // stack slot's alignment. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 12096b9..922c6e8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3800,7 +3800,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { int FI = SI->second; if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) - if (MDNode *Dbg = DI.getMetadata("dbg")) + if (MDNode *Dbg = DI.getDbgMetadata()) MMI->setVariableDbgInfo(Variable, FI, Dbg); return 0; } @@ -3824,22 +3824,19 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { // debug info exists. ++SDNodeOrder; if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) { - SDDbgValue* dv = new SDDbgValue(Variable, V, Offset, dl, SDNodeOrder); - DAG.RememberDbgInfo(dv); + DAG.AddDbgValue(DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder)); } else { SDValue &N = NodeMap[V]; - if (N.getNode()) { - SDDbgValue *dv = new SDDbgValue(Variable, N.getNode(), - N.getResNo(), Offset, dl, SDNodeOrder); - DAG.AssignDbgInfo(N.getNode(), dv); - } else { + if (N.getNode()) + DAG.AddDbgValue(DAG.getDbgValue(Variable, N.getNode(), + N.getResNo(), Offset, dl, SDNodeOrder), + N.getNode()); + else // We may expand this to cover more cases. One case where we have no // data available is an unreferenced parameter; we need this fallback. - SDDbgValue* dv = new SDDbgValue(Variable, + DAG.AddDbgValue(DAG.getDbgValue(Variable, UndefValue::get(V->getType()), - Offset, dl, SDNodeOrder); - DAG.RememberDbgInfo(dv); - } + Offset, dl, SDNodeOrder)); } // Build a debug info table entry. @@ -3855,7 +3852,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { return 0; // VLAs. int FI = SI->second; if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) - if (MDNode *Dbg = DI.getMetadata("dbg")) + if (MDNode *Dbg = DI.getDbgMetadata()) MMI->setVariableDbgInfo(Variable, FI, Dbg); return 0; } @@ -6054,8 +6051,10 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) { } if (!I->use_empty()) { - SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues, - SDB->getCurDebugLoc()); + SDValue Res; + if (!ArgValues.empty()) + Res = DAG.getMergeValues(&ArgValues[0], NumValues, + SDB->getCurDebugLoc()); SDB->setValue(I, Res); // If this argument is live outside of the entry block, insert a copy from diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index cbbe431..ea96b21 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -61,6 +61,7 @@ using namespace llvm; STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on"); +STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path"); static cl::opt<bool> EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, @@ -365,23 +366,23 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { /// SetDebugLoc - Update MF's and SDB's DebugLocs if debug information is /// attached with this instruction. -static void SetDebugLoc(unsigned MDDbgKind, Instruction *I, - SelectionDAGBuilder *SDB, +static void SetDebugLoc(Instruction *I, SelectionDAGBuilder *SDB, FastISel *FastIS, MachineFunction *MF) { - if (MDNode *Dbg = I->getMetadata(MDDbgKind)) { - DILocation DILoc(Dbg); - DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo()); + MDNode *Dbg = I->getDbgMetadata(); + if (Dbg == 0) return; + + DILocation DILoc(Dbg); + DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo()); - SDB->setCurDebugLoc(Loc); + SDB->setCurDebugLoc(Loc); - if (FastIS) - FastIS->setCurDebugLoc(Loc); + if (FastIS) + FastIS->setCurDebugLoc(Loc); - // If the function doesn't have a default debug location yet, set - // it. This is kind of a hack. - if (MF->getDefaultDebugLoc().isUnknown()) - MF->setDefaultDebugLoc(Loc); - } + // If the function doesn't have a default debug location yet, set + // it. This is kind of a hack. + if (MF->getDefaultDebugLoc().isUnknown()) + MF->setDefaultDebugLoc(Loc); } /// ResetDebugLoc - Set MF's and SDB's DebugLocs to Unknown. @@ -396,12 +397,11 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, BasicBlock::iterator End, bool &HadTailCall) { SDB->setCurrentBasicBlock(BB); - unsigned MDDbgKind = LLVMBB->getContext().getMDKindID("dbg"); // Lower all of the non-terminator instructions. If a call is emitted // as a tail call, cease emitting nodes for this block. for (BasicBlock::iterator I = Begin; I != End && !SDB->HasTailCall; ++I) { - SetDebugLoc(MDDbgKind, I, SDB, 0, MF); + SetDebugLoc(I, SDB, 0, MF); if (!isa<TerminatorInst>(I)) { SDB->visit(*I); @@ -424,7 +424,7 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, HandlePHINodesInSuccessorBlocks(LLVMBB); // Lower the terminator after the copies are emitted. - SetDebugLoc(MDDbgKind, LLVMBB->getTerminator(), SDB, 0, MF); + SetDebugLoc(LLVMBB->getTerminator(), SDB, 0, MF); SDB->visit(*LLVMBB->getTerminator()); ResetDebugLoc(SDB, 0); } @@ -842,9 +842,6 @@ void SelectionDAGISel::DoInstructionSelection() { DEBUG(errs() << "===== Instruction selection ends:\n"); PostprocessISelDAG(); - - // FIXME: This shouldn't be needed, remove it. - CurDAG->RemoveDeadNodes(); } @@ -865,8 +862,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, #endif ); - unsigned MDDbgKind = Fn.getContext().getMDKindID("dbg"); - // Iterate over all basic blocks in the function. for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { BasicBlock *LLVMBB = &*I; @@ -964,7 +959,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, break; } - SetDebugLoc(MDDbgKind, BI, SDB, FastIS, &MF); + SetDebugLoc(BI, SDB, FastIS, &MF); // Try to select the instruction with FastISel. if (FastIS->SelectInstruction(BI)) { @@ -1592,8 +1587,9 @@ UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain, assert(ChainVal.getValueType() == MVT::Other && "Not a chain?"); CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU); - // If the node became dead, delete it. - if (ChainNode->use_empty()) + // If the node became dead and we haven't already seen it, delete it. + if (ChainNode->use_empty() && + !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode)) NowDeadNodes.push_back(ChainNode); } } @@ -1614,8 +1610,9 @@ UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain, CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1), InputFlag, &ISU); - // If the node became dead, delete it. - if (FRN->use_empty()) + // If the node became dead and we haven't already seen it, delete it. + if (FRN->use_empty() && + !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), FRN)) NowDeadNodes.push_back(FRN); } } @@ -1810,9 +1807,9 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, // It is possible we're using MorphNodeTo to replace a node with no // normal results with one that has a normal result (or we could be // adding a chain) and the input could have flags and chains as well. - // In this case we need to shifting the operands down. + // In this case we need to shift the operands down. // FIXME: This is a horrible hack and broken in obscure cases, no worse - // than the old isel though. We should sink this into MorphNodeTo. + // than the old isel though. int OldFlagResultNo = -1, OldChainResultNo = -1; unsigned NTMNumResults = Node->getNumValues(); @@ -1888,7 +1885,9 @@ CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, ALWAYS_INLINE static bool CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDNode *N) { - return N->getOpcode() == MatcherTable[MatcherIndex++]; + uint16_t Opc = MatcherTable[MatcherIndex++]; + Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; + return N->getOpcode() == Opc; } ALWAYS_INLINE static bool @@ -2142,7 +2141,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (CaseSize == 0) break; // Get the opcode, add the index to the table. - unsigned Opc = MatcherTable[Idx++]; + uint16_t Opc = MatcherTable[Idx++]; + Opc |= (unsigned short)MatcherTable[Idx++] << 8; if (Opc >= OpcodeOffset.size()) OpcodeOffset.resize((Opc+1)*2); OpcodeOffset[Opc] = Idx; @@ -2181,6 +2181,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, FailIndex = MatcherIndex+NumToSkip; + unsigned MatcherIndexOfPredicate = MatcherIndex; + (void)MatcherIndexOfPredicate; // silence warning. + // If we can't evaluate this predicate without pushing a scope (e.g. if // it is a 'MoveParent') or if the predicate succeeds on this node, we // push the scope and evaluate the full predicate chain. @@ -2190,9 +2193,10 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (!Result) break; - DEBUG(errs() << " Skipped scope entry at index " << MatcherIndex - << " continuing at " << FailIndex << "\n"); - + DEBUG(errs() << " Skipped scope entry (due to false predicate) at " + << "index " << MatcherIndexOfPredicate + << ", continuing at " << FailIndex << "\n"); + ++NumDAGIselRetries; // Otherwise, we know that this case of the Scope is guaranteed to fail, // move to the next case. @@ -2298,8 +2302,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex); if (CaseSize == 0) break; + uint16_t Opc = MatcherTable[MatcherIndex++]; + Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; + // If the opcode matches, then we will execute this case. - if (CurNodeOpcode == MatcherTable[MatcherIndex++]) + if (CurNodeOpcode == Opc) break; // Otherwise, skip over this case. @@ -2428,6 +2435,35 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, continue; } + case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0 + case OPC_EmitMergeInputChains1_1: { // OPC_EmitMergeInputChains, 1, 1 + // These are space-optimized forms of OPC_EmitMergeInputChains. + assert(InputChain.getNode() == 0 && + "EmitMergeInputChains should be the first chain producing node"); + assert(ChainNodesMatched.empty() && + "Should only have one EmitMergeInputChains per match"); + + // Read all of the chained nodes. + unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1; + assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode()); + + // FIXME: What if other value results of the node have uses not matched + // by this pattern? + if (ChainNodesMatched.back() != NodeToMatch && + !RecordedNodes[RecNo].hasOneUse()) { + ChainNodesMatched.clear(); + break; + } + + // Merge the input chains if they are not intra-pattern references. + InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); + + if (InputChain.getNode() == 0) + break; // Failed to merge. + continue; + } + case OPC_EmitMergeInputChains: { assert(InputChain.getNode() == 0 && "EmitMergeInputChains should be the first chain producing node"); @@ -2646,14 +2682,10 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame"); SDValue Res = RecordedNodes[ResSlot]; - // FIXME2: Eliminate this horrible hack by fixing the 'Gen' program - // after (parallel) on input patterns are removed. This would also - // allow us to stop encoding #results in OPC_CompleteMatch's table - // entry. - if (NodeToMatch->getNumValues() <= i || - NodeToMatch->getValueType(i) == MVT::Other || - NodeToMatch->getValueType(i) == MVT::Flag) - break; + assert(i < NodeToMatch->getNumValues() && + NodeToMatch->getValueType(i) != MVT::Other && + NodeToMatch->getValueType(i) != MVT::Flag && + "Invalid number of results to complete!"); assert((NodeToMatch->getValueType(i) == Res.getValueType() || NodeToMatch->getValueType(i) == MVT::iPTR || Res.getValueType() == MVT::iPTR || @@ -2685,6 +2717,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // another child to try in the current 'Scope', otherwise pop it until we // find a case to check. DEBUG(errs() << " Match failed at index " << CurrentOpcodeIndex << "\n"); + ++NumDAGIselRetries; while (1) { if (MatchScopes.empty()) { CannotYetSelect(NodeToMatch); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index f7ef2d6..ea2ff2f 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -186,11 +186,13 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2"; Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2"; Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2"; - Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfi8"; - Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfi16"; + Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi"; + Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi"; Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi"; Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi"; Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti"; + Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi"; + Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi"; Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi"; Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi"; Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti"; @@ -200,11 +202,13 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi"; Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi"; Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti"; - Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfi8"; - Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfi16"; + Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi"; + Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi"; Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi"; Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi"; Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti"; + Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi"; + Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi"; Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi"; Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi"; Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti"; @@ -314,6 +318,10 @@ RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { if (RetVT == MVT::i128) return FPTOSINT_F32_I128; } else if (OpVT == MVT::f64) { + if (RetVT == MVT::i8) + return FPTOSINT_F64_I8; + if (RetVT == MVT::i16) + return FPTOSINT_F64_I16; if (RetVT == MVT::i32) return FPTOSINT_F64_I32; if (RetVT == MVT::i64) @@ -353,6 +361,10 @@ RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { if (RetVT == MVT::i128) return FPTOUINT_F32_I128; } else if (OpVT == MVT::f64) { + if (RetVT == MVT::i8) + return FPTOUINT_F64_I8; + if (RetVT == MVT::i16) + return FPTOUINT_F64_I16; if (RetVT == MVT::i32) return FPTOUINT_F64_I32; if (RetVT == MVT::i64) @@ -475,7 +487,6 @@ TargetLowering::TargetLowering(TargetMachine &tm,TargetLoweringObjectFile *tlof) memset(LoadExtActions, 0, sizeof(LoadExtActions)); memset(TruncStoreActions, 0, sizeof(TruncStoreActions)); memset(IndexedModeActions, 0, sizeof(IndexedModeActions)); - memset(ConvertActions, 0, sizeof(ConvertActions)); memset(CondCodeActions, 0, sizeof(CondCodeActions)); // Set default actions for various operations. diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 97e858f..15ca374 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -59,11 +59,6 @@ DisableCrossClassJoin("disable-cross-class-join", cl::desc("Avoid coalescing cross register class copies"), cl::init(false), cl::Hidden); -static cl::opt<bool> -PhysJoinTweak("tweak-phys-join-heuristics", - cl::desc("Tweak heuristics for joining phys reg with vr"), - cl::init(false), cl::Hidden); - static RegisterPass<SimpleRegisterCoalescing> X("simple-register-coalescing", "Simple Register Coalescing"); @@ -1445,7 +1440,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg); const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg); const TargetRegisterClass *NewRC = NULL; - MachineBasicBlock *CopyMBB = CopyMI->getParent(); unsigned RealDstReg = 0; unsigned RealSrcReg = 0; if (isExtSubReg || isInsSubReg || isSubRegToReg) { @@ -1646,8 +1640,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { else if (RealSrcReg) SavedLI.reset(li_->dupInterval(&DstInt)); - // Check if it is necessary to propagate "isDead" property. if (!isExtSubReg && !isInsSubReg && !isSubRegToReg) { + // Check if it is necessary to propagate "isDead" property. MachineOperand *mopd = CopyMI->findRegisterDefOperand(DstReg, false); bool isDead = mopd->isDead(); @@ -1656,60 +1650,42 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // these are not spillable! If the destination interval uses are far away, // think twice about coalescing them! if (!isDead && (SrcIsPhys || DstIsPhys)) { - // If the copy is in a loop, take care not to coalesce aggressively if the - // src is coming in from outside the loop (or the dst is out of the loop). - // If it's not in a loop, then determine whether to join them base purely - // by the length of the interval. - if (PhysJoinTweak) { - if (SrcIsPhys) { - if (!isWinToJoinVRWithSrcPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) { - mri_->setRegAllocationHint(DstInt.reg, 0, SrcReg); - ++numAborts; - DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); - Again = true; // May be possible to coalesce later. - return false; - } - } else { - if (!isWinToJoinVRWithDstPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) { - mri_->setRegAllocationHint(SrcInt.reg, 0, DstReg); - ++numAborts; - DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); - Again = true; // May be possible to coalesce later. - return false; - } - } - } else { - // If the virtual register live interval is long but it has low use - // density, do not join them, instead mark the physical register as its - // allocation preference. - LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt; - LiveInterval &JoinPInt = SrcIsPhys ? SrcInt : DstInt; - unsigned JoinVReg = SrcIsPhys ? DstReg : SrcReg; - unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg; - - // Don't join with physregs that have a ridiculous number of live - // ranges. The data structure performance is really bad when that - // happens. - if (JoinPInt.ranges.size() > 1000) { - mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg); - ++numAborts; - DEBUG(dbgs() << "\tPhysical register too complicated, abort!\n"); - return false; - } + // If the virtual register live interval is long but it has low use + // density, do not join them, instead mark the physical register as its + // allocation preference. + LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt; + LiveInterval &JoinPInt = SrcIsPhys ? SrcInt : DstInt; + unsigned JoinVReg = SrcIsPhys ? DstReg : SrcReg; + unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg; + + // Don't join with physregs that have a ridiculous number of live + // ranges. The data structure performance is really bad when that + // happens. + if (JoinPInt.ranges.size() > 1000) { + mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg); + ++numAborts; + DEBUG(dbgs() + << "\tPhysical register live interval too complicated, abort!\n"); + return false; + } - const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg); - unsigned Threshold = allocatableRCRegs_[RC].count() * 2; - unsigned Length = li_->getApproximateInstructionCount(JoinVInt); - float Ratio = 1.0 / Threshold; - if (Length > Threshold && - (((float)std::distance(mri_->use_nodbg_begin(JoinVReg), - mri_->use_nodbg_end()) / Length) < Ratio)) { - mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg); - ++numAborts; - DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); - Again = true; // May be possible to coalesce later. - return false; - } + const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg); + unsigned Threshold = allocatableRCRegs_[RC].count() * 2; + unsigned Length = li_->getApproximateInstructionCount(JoinVInt); + float Ratio = 1.0 / Threshold; + if (Length > Threshold && + (((float)std::distance(mri_->use_nodbg_begin(JoinVReg), + mri_->use_nodbg_end()) / Length) < Ratio)) { + // Before giving up coalescing, if definition of source is defined by + // trivial computation, try rematerializing it. + if (ReMaterializeTrivialDef(SrcInt, DstReg, DstSubIdx, CopyMI)) + return true; + + mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg); + ++numAborts; + DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); + Again = true; // May be possible to coalesce later. + return false; } } } @@ -1720,16 +1696,15 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // been modified, so we can use this information below to update aliases. bool Swapped = false; // If SrcInt is implicitly defined, it's safe to coalesce. - bool isEmpty = SrcInt.empty(); - if (isEmpty && !CanCoalesceWithImpDef(CopyMI, DstInt, SrcInt)) { - // Only coalesce an empty interval (defined by implicit_def) with - // another interval which has a valno defined by the CopyMI and the CopyMI - // is a kill of the implicit def. - DEBUG(dbgs() << "Not profitable!\n"); - return false; - } - - if (!isEmpty && !JoinIntervals(DstInt, SrcInt, Swapped)) { + if (SrcInt.empty()) { + if (!CanCoalesceWithImpDef(CopyMI, DstInt, SrcInt)) { + // Only coalesce an empty interval (defined by implicit_def) with + // another interval which has a valno defined by the CopyMI and the CopyMI + // is a kill of the implicit def. + DEBUG(dbgs() << "Not profitable!\n"); + return false; + } + } else if (!JoinIntervals(DstInt, SrcInt, Swapped)) { // Coalescing failed. // If definition of source is defined by trivial computation, try @@ -2800,7 +2775,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { if (MO.isDead()) continue; if (TargetRegisterInfo::isPhysicalRegister(Reg) || - !mri_->use_empty(Reg)) { + !mri_->use_nodbg_empty(Reg)) { isDead = false; break; } diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index b62cca3..d6bdb10 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -406,7 +406,7 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, // Add information about the stub reference to ELFMMI so that the stub // gets emitted by the asmprinter. - MCSymbol *SSym = getContext().GetOrCreateTemporarySymbol(Name.str()); + MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { MCSymbol *Sym = Mang->getSymbol(GV); @@ -759,7 +759,7 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, // Add information about the stub reference to MachOMMI so that the stub // gets emitted by the asmprinter. - MCSymbol *SSym = getContext().GetOrCreateTemporarySymbol(Name.str()); + MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { MCSymbol *Sym = Mang->getSymbol(GV); diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index c840b39..c288ae0 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -188,8 +188,9 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, // Find the instruction that kills SavedReg. MachineInstr *KillMI = NULL; - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SavedReg), - UE = MRI->use_end(); UI != UE; ++UI) { + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SavedReg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { MachineOperand &UseMO = UI.getOperand(); if (!UseMO.isKill()) continue; @@ -280,8 +281,8 @@ TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg, MachineInstr *MI, MachineInstr *DefMI, MachineBasicBlock *MBB, unsigned Loc) { bool OtherUse = false; - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), - UE = MRI->use_end(); UI != UE; ++UI) { + for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { MachineOperand &UseMO = UI.getOperand(); MachineInstr *UseMI = UseMO.getParent(); MachineBasicBlock *UseMBB = UseMI->getParent(); @@ -927,6 +928,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { mi = nmi; continue; } + const TargetInstrDesc &TID = mi->getDesc(); bool FirstTied = true; @@ -1101,7 +1103,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { // Some remat'ed instructions are dead. int VReg = ReMatRegs.find_first(); while (VReg != -1) { - if (MRI->use_empty(VReg)) { + if (MRI->use_nodbg_empty(VReg)) { MachineInstr *DefMI = MRI->getVRegDef(VReg); DefMI->eraseFromParent(); } diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index 44d5311..0b7fde7 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -572,6 +572,9 @@ static bool InvalidateRegDef(MachineBasicBlock::iterator I, static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI, BitVector &RegKills, std::vector<MachineOperand*> &KillOps) { + // These do not affect kill info at all. + if (MI.isDebugValue()) + return; for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isUse() || MO.isUndef()) @@ -987,10 +990,17 @@ static unsigned FindFreeRegister(MachineBasicBlock::iterator MII, SmallVector<unsigned, 4> Kills; // Take a look at 2 instructions at most. - for (unsigned Count = 0; Count < 2; ++Count) { + unsigned Count = 0; + while (Count < 2) { if (MII == MBB.begin()) break; MachineInstr *PrevMI = prior(MII); + MII = PrevMI; + + if (PrevMI->isDebugValue()) + continue; // Skip over dbg_value instructions. + ++Count; + for (unsigned i = 0, e = PrevMI->getNumOperands(); i != e; ++i) { MachineOperand &MO = PrevMI->getOperand(i); if (!MO.isReg() || MO.getReg() == 0) @@ -1019,8 +1029,6 @@ static unsigned FindFreeRegister(MachineBasicBlock::iterator MII, for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) Uses.set(*AS); } - - MII = PrevMI; } return 0; @@ -1210,6 +1218,9 @@ OptimizeByUnfold2(unsigned VirtReg, int SS, std::vector<MachineOperand*> &KillOps) { MachineBasicBlock::iterator NextMII = llvm::next(MII); + // Skip over dbg_value instructions. + while (NextMII != MBB->end() && NextMII->isDebugValue()) + NextMII = llvm::next(NextMII); if (NextMII == MBB->end()) return false; @@ -1274,6 +1285,9 @@ OptimizeByUnfold2(unsigned VirtReg, int SS, VRM->RemoveMachineInstrFromMaps(&NextMI); MBB->erase(&NextMI); ++NumModRefUnfold; + // Skip over dbg_value instructions. + while (NextMII != MBB->end() && NextMII->isDebugValue()) + NextMII = llvm::next(NextMII); if (NextMII == MBB->end()) break; } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM)); @@ -1619,7 +1633,7 @@ TransferDeadness(unsigned Reg, BitVector &RegKills, for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg), RE = MRI->reg_end(); RI != RE; ++RI) { MachineInstr *UDMI = &*RI; - if (UDMI->getParent() != MBB) + if (UDMI->isDebugValue() || UDMI->getParent() != MBB) continue; DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI); if (DI == DistanceMap.end()) diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index b2e2a04..da21c2d 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -66,10 +66,39 @@ ExecutionEngine::~ExecutionEngine() { delete Modules[i]; } +namespace { +// This class automatically deletes the memory block when the GlobalVariable is +// destroyed. +class GVMemoryBlock : public CallbackVH { + GVMemoryBlock(const GlobalVariable *GV) + : CallbackVH(const_cast<GlobalVariable*>(GV)) {} + +public: + // Returns the address the GlobalVariable should be written into. The + // GVMemoryBlock object prefixes that. + static char *Create(const GlobalVariable *GV, const TargetData& TD) { + const Type *ElTy = GV->getType()->getElementType(); + size_t GVSize = (size_t)TD.getTypeAllocSize(ElTy); + void *RawMemory = ::operator new( + TargetData::RoundUpAlignment(sizeof(GVMemoryBlock), + TD.getPreferredAlignment(GV)) + + GVSize); + new(RawMemory) GVMemoryBlock(GV); + return static_cast<char*>(RawMemory) + sizeof(GVMemoryBlock); + } + + virtual void deleted() { + // We allocated with operator new and with some extra memory hanging off the + // end, so don't just delete this. I'm not sure if this is actually + // required. + this->~GVMemoryBlock(); + ::operator delete(this); + } +}; +} // anonymous namespace + char* ExecutionEngine::getMemoryForGV(const GlobalVariable* GV) { - const Type *ElTy = GV->getType()->getElementType(); - size_t GVSize = (size_t)getTargetData()->getTypeAllocSize(ElTy); - return new char[GVSize]; + return GVMemoryBlock::Create(GV, *getTargetData()); } /// removeModule - Remove a Module from the list of modules. @@ -221,35 +250,55 @@ const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) { return I != EEState.getGlobalAddressReverseMap(locked).end() ? I->second : 0; } -// CreateArgv - Turn a vector of strings into a nice argv style array of -// pointers to null terminated strings. -// -static void *CreateArgv(LLVMContext &C, ExecutionEngine *EE, - const std::vector<std::string> &InputArgv) { +namespace { +class ArgvArray { + char *Array; + std::vector<char*> Values; +public: + ArgvArray() : Array(NULL) {} + ~ArgvArray() { clear(); } + void clear() { + delete[] Array; + Array = NULL; + for (size_t I = 0, E = Values.size(); I != E; ++I) { + delete[] Values[I]; + } + Values.clear(); + } + /// Turn a vector of strings into a nice argv style array of pointers to null + /// terminated strings. + void *reset(LLVMContext &C, ExecutionEngine *EE, + const std::vector<std::string> &InputArgv); +}; +} // anonymous namespace +void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE, + const std::vector<std::string> &InputArgv) { + clear(); // Free the old contents. unsigned PtrSize = EE->getTargetData()->getPointerSize(); - char *Result = new char[(InputArgv.size()+1)*PtrSize]; + Array = new char[(InputArgv.size()+1)*PtrSize]; - DEBUG(dbgs() << "JIT: ARGV = " << (void*)Result << "\n"); + DEBUG(dbgs() << "JIT: ARGV = " << (void*)Array << "\n"); const Type *SBytePtr = Type::getInt8PtrTy(C); for (unsigned i = 0; i != InputArgv.size(); ++i) { unsigned Size = InputArgv[i].size()+1; char *Dest = new char[Size]; + Values.push_back(Dest); DEBUG(dbgs() << "JIT: ARGV[" << i << "] = " << (void*)Dest << "\n"); std::copy(InputArgv[i].begin(), InputArgv[i].end(), Dest); Dest[Size-1] = 0; - // Endian safe: Result[i] = (PointerTy)Dest; - EE->StoreValueToMemory(PTOGV(Dest), (GenericValue*)(Result+i*PtrSize), + // Endian safe: Array[i] = (PointerTy)Dest; + EE->StoreValueToMemory(PTOGV(Dest), (GenericValue*)(Array+i*PtrSize), SBytePtr); } // Null terminate it EE->StoreValueToMemory(PTOGV(0), - (GenericValue*)(Result+InputArgv.size()*PtrSize), + (GenericValue*)(Array+InputArgv.size()*PtrSize), SBytePtr); - return Result; + return Array; } @@ -353,11 +402,13 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn, llvm_report_error("Invalid number of arguments of main() supplied"); } + ArgvArray CArgv; + ArgvArray CEnv; if (NumArgs) { GVArgs.push_back(GVArgc); // Arg #0 = argc. if (NumArgs > 1) { // Arg #1 = argv. - GVArgs.push_back(PTOGV(CreateArgv(Fn->getContext(), this, argv))); + GVArgs.push_back(PTOGV(CArgv.reset(Fn->getContext(), this, argv))); assert(!isTargetNullPtr(this, GVTOP(GVArgs[1])) && "argv[0] was null after CreateArgv"); if (NumArgs > 2) { @@ -365,7 +416,7 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn, for (unsigned i = 0; envp[i]; ++i) EnvVars.push_back(envp[i]); // Arg #2 = envp. - GVArgs.push_back(PTOGV(CreateArgv(Fn->getContext(), this, EnvVars))); + GVArgs.push_back(PTOGV(CEnv.reset(Fn->getContext(), this, EnvVars))); } } } diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index 7b061d3..3ba783b 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -265,6 +265,8 @@ GenericValue Interpreter::callExternalFunction(Function *F, if (RF == RawFunctions->end()) { RawFn = (RawFunc)(intptr_t) sys::DynamicLibrary::SearchForAddressOfSymbol(F->getName()); + if (!RawFn) + RawFn = (RawFunc)(intptr_t)getPointerToGlobalIfAvailable(F); if (RawFn != 0) RawFunctions->insert(std::make_pair(F, RawFn)); // Cache for later } else { diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 2025463..7a23aec 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -623,24 +623,10 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst) { AddEncodingComment(Inst); // Show the MCInst if enabled. - if (ShowInst) { - raw_ostream &OS = GetCommentOS(); - OS << "<MCInst #" << Inst.getOpcode(); - - StringRef InstName; - if (InstPrinter) - InstName = InstPrinter->getOpcodeName(Inst.getOpcode()); - if (!InstName.empty()) - OS << ' ' << InstName; - - for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) { - OS << "\n "; - Inst.getOperand(i).print(OS, &MAI); - } - OS << ">\n"; - } + if (ShowInst) + Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n "); - // If we have an AsmPrinter, use that to print, otherwise dump the MCInst. + // If we have an AsmPrinter, use that to print, otherwise print the MCInst. if (InstPrinter) InstPrinter->printInst(&Inst); else diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index beecf7e..03b8bd3 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -19,19 +19,26 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Debug.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Target/TargetAsmBackend.h" -// FIXME: Gross. -#include "../Target/X86/X86FixupKinds.h" - #include <vector> using namespace llvm; +namespace { +namespace stats { STATISTIC(EmittedFragments, "Number of emitted assembler fragments"); +STATISTIC(EvaluateFixup, "Number of evaluated fixups"); +STATISTIC(FragmentLayouts, "Number of fragment layouts"); +STATISTIC(ObjectBytes, "Number of emitted object file bytes"); +STATISTIC(RelaxationSteps, "Number of assembler layout and relaxation steps"); +STATISTIC(RelaxedInstructions, "Number of relaxed instructions"); +STATISTIC(SectionLayouts, "Number of section layouts"); +} +} // FIXME FIXME FIXME: There are number of places in this file where we convert // what is a 64-bit assembler value used for computation into a value in the @@ -40,13 +47,103 @@ STATISTIC(EmittedFragments, "Number of emitted assembler fragments"); /* *** */ +void MCAsmLayout::UpdateForSlide(MCFragment *F, int SlideAmount) { + // We shouldn't have to do anything special to support negative slides, and it + // is a perfectly valid thing to do as long as other parts of the system are + // can guarantee convergence. + assert(SlideAmount >= 0 && "Negative slides not yet supported"); + + // Update the layout by simply recomputing the layout for the entire + // file. This is trivially correct, but very slow. + // + // FIXME-PERF: This is O(N^2), but will be eliminated once we get smarter. + + // Layout the concrete sections and fragments. + MCAssembler &Asm = getAssembler(); + uint64_t Address = 0; + for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) { + // Skip virtual sections. + if (Asm.getBackend().isVirtualSection(it->getSection())) + continue; + + // Layout the section fragments and its size. + Address = Asm.LayoutSection(*it, *this, Address); + } + + // Layout the virtual sections. + for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) { + if (!Asm.getBackend().isVirtualSection(it->getSection())) + continue; + + // Layout the section fragments and its size. + Address = Asm.LayoutSection(*it, *this, Address); + } +} + +uint64_t MCAsmLayout::getFragmentAddress(const MCFragment *F) const { + assert(F->getParent() && "Missing section()!"); + return getSectionAddress(F->getParent()) + getFragmentOffset(F); +} + +uint64_t MCAsmLayout::getFragmentEffectiveSize(const MCFragment *F) const { + assert(F->EffectiveSize != ~UINT64_C(0) && "Address not set!"); + return F->EffectiveSize; +} + +void MCAsmLayout::setFragmentEffectiveSize(MCFragment *F, uint64_t Value) { + F->EffectiveSize = Value; +} + +uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const { + assert(F->Offset != ~UINT64_C(0) && "Address not set!"); + return F->Offset; +} + +void MCAsmLayout::setFragmentOffset(MCFragment *F, uint64_t Value) { + F->Offset = Value; +} + +uint64_t MCAsmLayout::getSymbolAddress(const MCSymbolData *SD) const { + assert(SD->getFragment() && "Invalid getAddress() on undefined symbol!"); + return getFragmentAddress(SD->getFragment()) + SD->getOffset(); +} + +uint64_t MCAsmLayout::getSectionAddress(const MCSectionData *SD) const { + assert(SD->Address != ~UINT64_C(0) && "Address not set!"); + return SD->Address; +} + +void MCAsmLayout::setSectionAddress(MCSectionData *SD, uint64_t Value) { + SD->Address = Value; +} + +uint64_t MCAsmLayout::getSectionSize(const MCSectionData *SD) const { + assert(SD->Size != ~UINT64_C(0) && "File size not set!"); + return SD->Size; +} +void MCAsmLayout::setSectionSize(MCSectionData *SD, uint64_t Value) { + SD->Size = Value; +} + +uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const { + assert(SD->FileSize != ~UINT64_C(0) && "File size not set!"); + return SD->FileSize; +} +void MCAsmLayout::setSectionFileSize(MCSectionData *SD, uint64_t Value) { + SD->FileSize = Value; +} + + /// @} + +/* *** */ + MCFragment::MCFragment() : Kind(FragmentType(~0)) { } MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent) : Kind(_Kind), Parent(_Parent), - FileSize(~UINT64_C(0)) + EffectiveSize(~UINT64_C(0)) { if (Parent) Parent->getFragmentList().push_back(this); @@ -55,11 +152,6 @@ MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent) MCFragment::~MCFragment() { } -uint64_t MCFragment::getAddress() const { - assert(getParent() && "Missing Section!"); - return getParent()->getAddress() + Offset; -} - /* *** */ MCSectionData::MCSectionData() : Section(0) {} @@ -95,7 +187,7 @@ MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment, MCAssembler::MCAssembler(MCContext &_Context, TargetAsmBackend &_Backend, MCCodeEmitter &_Emitter, raw_ostream &_OS) : Context(_Context), Backend(_Backend), Emitter(_Emitter), - OS(_OS), SubsectionsViaSymbols(false) + OS(_OS), RelaxAll(false), SubsectionsViaSymbols(false) { } @@ -104,7 +196,6 @@ MCAssembler::~MCAssembler() { static bool isScatteredFixupFullyResolvedSimple(const MCAssembler &Asm, const MCAsmFixup &Fixup, - const MCDataFragment *DF, const MCValue Target, const MCSection *BaseSection) { // The effective fixup address is @@ -141,8 +232,8 @@ static bool isScatteredFixupFullyResolvedSimple(const MCAssembler &Asm, } static bool isScatteredFixupFullyResolved(const MCAssembler &Asm, + const MCAsmLayout &Layout, const MCAsmFixup &Fixup, - const MCDataFragment *DF, const MCValue Target, const MCSymbolData *BaseSymbol) { // The effective fixup address is @@ -162,7 +253,7 @@ static bool isScatteredFixupFullyResolved(const MCAssembler &Asm, if (A->getKind() != MCSymbolRefExpr::VK_None) return false; - A_Base = Asm.getAtom(&Asm.getSymbolData(A->getSymbol())); + A_Base = Asm.getAtom(Layout, &Asm.getSymbolData(A->getSymbol())); if (!A_Base) return false; } @@ -172,7 +263,7 @@ static bool isScatteredFixupFullyResolved(const MCAssembler &Asm, if (B->getKind() != MCSymbolRefExpr::VK_None) return false; - B_Base = Asm.getAtom(&Asm.getSymbolData(B->getSymbol())); + B_Base = Asm.getAtom(Layout, &Asm.getSymbolData(B->getSymbol())); if (!B_Base) return false; } @@ -200,9 +291,13 @@ bool MCAssembler::isSymbolLinkerVisible(const MCSymbolData *SD) const { SD->getFragment()->getParent()->getSection()); } -const MCSymbolData *MCAssembler::getAtomForAddress(const MCSectionData *Section, +// FIXME-PERF: This routine is really slow. +const MCSymbolData *MCAssembler::getAtomForAddress(const MCAsmLayout &Layout, + const MCSectionData *Section, uint64_t Address) const { const MCSymbolData *Best = 0; + uint64_t BestAddress = 0; + for (MCAssembler::const_symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) { // Ignore non-linker visible symbols. @@ -215,15 +310,19 @@ const MCSymbolData *MCAssembler::getAtomForAddress(const MCSectionData *Section, // Otherwise, find the closest symbol preceding this address (ties are // resolved in favor of the last defined symbol). - if (it->getAddress() <= Address && - (!Best || it->getAddress() >= Best->getAddress())) + uint64_t SymbolAddress = Layout.getSymbolAddress(it); + if (SymbolAddress <= Address && (!Best || SymbolAddress >= BestAddress)) { Best = it; + BestAddress = SymbolAddress; + } } return Best; } -const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const { +// FIXME-PERF: This routine is really slow. +const MCSymbolData *MCAssembler::getAtom(const MCAsmLayout &Layout, + const MCSymbolData *SD) const { // Linker visible symbols define atoms. if (isSymbolLinkerVisible(SD)) return SD; @@ -233,12 +332,15 @@ const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const { return 0; // Otherwise, search by address. - return getAtomForAddress(SD->getFragment()->getParent(), SD->getAddress()); + return getAtomForAddress(Layout, SD->getFragment()->getParent(), + Layout.getSymbolAddress(SD)); } -bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, MCAsmFixup &Fixup, - MCDataFragment *DF, +bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, + const MCAsmFixup &Fixup, const MCFragment *DF, MCValue &Target, uint64_t &Value) const { + ++stats::EvaluateFixup; + if (!Fixup.Value->EvaluateAsRelocatable(Target, &Layout)) llvm_report_error("expected relocatable expression"); @@ -253,13 +355,13 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, MCAsmFixup &Fixup, bool IsResolved = true; if (const MCSymbolRefExpr *A = Target.getSymA()) { if (A->getSymbol().isDefined()) - Value += getSymbolData(A->getSymbol()).getAddress(); + Value += Layout.getSymbolAddress(&getSymbolData(A->getSymbol())); else IsResolved = false; } if (const MCSymbolRefExpr *B = Target.getSymB()) { if (B->getSymbol().isDefined()) - Value -= getSymbolData(B->getSymbol()).getAddress(); + Value -= Layout.getSymbolAddress(&getSymbolData(B->getSymbol())); else IsResolved = false; } @@ -273,55 +375,90 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, MCAsmFixup &Fixup, const MCSymbolData *BaseSymbol = 0; if (IsPCRel) { BaseSymbol = getAtomForAddress( - DF->getParent(), DF->getAddress() + Fixup.Offset); + Layout, DF->getParent(), Layout.getFragmentAddress(DF)+Fixup.Offset); if (!BaseSymbol) IsResolved = false; } if (IsResolved) - IsResolved = isScatteredFixupFullyResolved(*this, Fixup, DF, Target, + IsResolved = isScatteredFixupFullyResolved(*this, Layout, Fixup, Target, BaseSymbol); } else { const MCSection *BaseSection = 0; if (IsPCRel) BaseSection = &DF->getParent()->getSection(); - IsResolved = isScatteredFixupFullyResolvedSimple(*this, Fixup, DF, Target, + IsResolved = isScatteredFixupFullyResolvedSimple(*this, Fixup, Target, BaseSection); } } if (IsPCRel) - Value -= DF->getAddress() + Fixup.Offset; + Value -= Layout.getFragmentAddress(DF) + Fixup.Offset; return IsResolved; } -void MCAssembler::LayoutSection(MCSectionData &SD) { - MCAsmLayout Layout(*this); - uint64_t Address = SD.getAddress(); +uint64_t MCAssembler::LayoutSection(MCSectionData &SD, + MCAsmLayout &Layout, + uint64_t StartAddress) { + bool IsVirtual = getBackend().isVirtualSection(SD.getSection()); + + ++stats::SectionLayouts; + + // Align this section if necessary by adding padding bytes to the previous + // section. It is safe to adjust this out-of-band, because no symbol or + // fragment is allowed to point past the end of the section at any time. + if (uint64_t Pad = OffsetToAlignment(StartAddress, SD.getAlignment())) { + // Unless this section is virtual (where we are allowed to adjust the offset + // freely), the padding goes in the previous section. + if (!IsVirtual) { + // Find the previous non-virtual section. + iterator it = &SD; + assert(it != begin() && "Invalid initial section address!"); + for (--it; getBackend().isVirtualSection(it->getSection()); --it) ; + Layout.setSectionFileSize(&*it, Layout.getSectionFileSize(&*it) + Pad); + } + + StartAddress += Pad; + } + + // Set the aligned section address. + Layout.setSectionAddress(&SD, StartAddress); + uint64_t Address = StartAddress; for (MCSectionData::iterator it = SD.begin(), ie = SD.end(); it != ie; ++it) { MCFragment &F = *it; - F.setOffset(Address - SD.getAddress()); + ++stats::FragmentLayouts; + + uint64_t FragmentOffset = Address - StartAddress; + Layout.setFragmentOffset(&F, FragmentOffset); // Evaluate fragment size. + uint64_t EffectiveSize = 0; switch (F.getKind()) { case MCFragment::FT_Align: { MCAlignFragment &AF = cast<MCAlignFragment>(F); - uint64_t Size = OffsetToAlignment(Address, AF.getAlignment()); - if (Size > AF.getMaxBytesToEmit()) - AF.setFileSize(0); - else - AF.setFileSize(Size); + EffectiveSize = OffsetToAlignment(Address, AF.getAlignment()); + if (EffectiveSize > AF.getMaxBytesToEmit()) + EffectiveSize = 0; break; } case MCFragment::FT_Data: - case MCFragment::FT_Fill: - F.setFileSize(F.getMaxFileSize()); + EffectiveSize = cast<MCDataFragment>(F).getContents().size(); + break; + + case MCFragment::FT_Fill: { + MCFillFragment &FF = cast<MCFillFragment>(F); + EffectiveSize = FF.getValueSize() * FF.getCount(); + break; + } + + case MCFragment::FT_Inst: + EffectiveSize = cast<MCInstFragment>(F).getInstSize(); break; case MCFragment::FT_Org: { @@ -332,12 +469,12 @@ void MCAssembler::LayoutSection(MCSectionData &SD) { llvm_report_error("expected assembly-time absolute expression"); // FIXME: We need a way to communicate this error. - int64_t Offset = TargetLocation - F.getOffset(); + int64_t Offset = TargetLocation - FragmentOffset; if (Offset < 0) llvm_report_error("invalid .org offset '" + Twine(TargetLocation) + - "' (at offset '" + Twine(F.getOffset()) + "'"); + "' (at offset '" + Twine(FragmentOffset) + "'"); - F.setFileSize(Offset); + EffectiveSize = Offset; break; } @@ -346,114 +483,66 @@ void MCAssembler::LayoutSection(MCSectionData &SD) { // Align the fragment offset; it is safe to adjust the offset freely since // this is only in virtual sections. + // + // FIXME: We shouldn't be doing this here. Address = RoundUpToAlignment(Address, ZFF.getAlignment()); - F.setOffset(Address - SD.getAddress()); + Layout.setFragmentOffset(&F, Address - StartAddress); - // FIXME: This is misnamed. - F.setFileSize(ZFF.getSize()); + EffectiveSize = ZFF.getSize(); break; } } - Address += F.getFileSize(); + Layout.setFragmentEffectiveSize(&F, EffectiveSize); + Address += EffectiveSize; } // Set the section sizes. - SD.setSize(Address - SD.getAddress()); - if (getBackend().isVirtualSection(SD.getSection())) - SD.setFileSize(0); + Layout.setSectionSize(&SD, Address - StartAddress); + if (IsVirtual) + Layout.setSectionFileSize(&SD, 0); else - SD.setFileSize(Address - SD.getAddress()); -} - -/// WriteNopData - Write optimal nops to the output file for the \arg Count -/// bytes. This returns the number of bytes written. It may return 0 if -/// the \arg Count is more than the maximum optimal nops. -/// -/// FIXME this is X86 32-bit specific and should move to a better place. -static uint64_t WriteNopData(uint64_t Count, MCObjectWriter *OW) { - static const uint8_t Nops[16][16] = { - // nop - {0x90}, - // xchg %ax,%ax - {0x66, 0x90}, - // nopl (%[re]ax) - {0x0f, 0x1f, 0x00}, - // nopl 0(%[re]ax) - {0x0f, 0x1f, 0x40, 0x00}, - // nopl 0(%[re]ax,%[re]ax,1) - {0x0f, 0x1f, 0x44, 0x00, 0x00}, - // nopw 0(%[re]ax,%[re]ax,1) - {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}, - // nopl 0L(%[re]ax) - {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}, - // nopl 0L(%[re]ax,%[re]ax,1) - {0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, - // nopw 0L(%[re]ax,%[re]ax,1) - {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, - // nopw %cs:0L(%[re]ax,%[re]ax,1) - {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, - // nopl 0(%[re]ax,%[re]ax,1) - // nopw 0(%[re]ax,%[re]ax,1) - {0x0f, 0x1f, 0x44, 0x00, 0x00, - 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}, - // nopw 0(%[re]ax,%[re]ax,1) - // nopw 0(%[re]ax,%[re]ax,1) - {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, - 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}, - // nopw 0(%[re]ax,%[re]ax,1) - // nopl 0L(%[re]ax) */ - {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, - 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}, - // nopl 0L(%[re]ax) - // nopl 0L(%[re]ax) - {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00, - 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}, - // nopl 0L(%[re]ax) - // nopl 0L(%[re]ax,%[re]ax,1) - {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00, - 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00} - }; - - if (Count > 15) - return 0; - - for (uint64_t i = 0; i < Count; i++) - OW->Write8(uint8_t(Nops[Count - 1][i])); + Layout.setSectionFileSize(&SD, Address - StartAddress); - return Count; + return Address; } /// WriteFragmentData - Write the \arg F data to the output file. -static void WriteFragmentData(const MCFragment &F, MCObjectWriter *OW) { +static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment &F, MCObjectWriter *OW) { uint64_t Start = OW->getStream().tell(); (void) Start; - ++EmittedFragments; + ++stats::EmittedFragments; // FIXME: Embed in fragments instead? + uint64_t FragmentSize = Layout.getFragmentEffectiveSize(&F); switch (F.getKind()) { case MCFragment::FT_Align: { MCAlignFragment &AF = cast<MCAlignFragment>(F); - uint64_t Count = AF.getFileSize() / AF.getValueSize(); + uint64_t Count = FragmentSize / AF.getValueSize(); // FIXME: This error shouldn't actually occur (the front end should emit // multiple .align directives to enforce the semantics it wants), but is // severe enough that we want to report it. How to handle this? - if (Count * AF.getValueSize() != AF.getFileSize()) + if (Count * AF.getValueSize() != FragmentSize) llvm_report_error("undefined .align directive, value size '" + Twine(AF.getValueSize()) + "' is not a divisor of padding size '" + - Twine(AF.getFileSize()) + "'"); + Twine(FragmentSize) + "'"); // See if we are aligning with nops, and if so do that first to try to fill // the Count bytes. Then if that did not fill any bytes or there are any // bytes left to fill use the the Value and ValueSize to fill the rest. + // If we are aligning with nops, ask that target to emit the right data. if (AF.getEmitNops()) { - uint64_t NopByteCount = WriteNopData(Count, OW); - Count -= NopByteCount; + if (!Asm.getBackend().WriteNopData(Count, OW)) + llvm_report_error("unable to write nop sequence of " + + Twine(Count) + " bytes"); + break; } + // Otherwise, write out in multiples of the value size. for (uint64_t i = 0; i != Count; ++i) { switch (AF.getValueSize()) { default: @@ -468,7 +557,9 @@ static void WriteFragmentData(const MCFragment &F, MCObjectWriter *OW) { } case MCFragment::FT_Data: { - OW->WriteBytes(cast<MCDataFragment>(F).getContents().str()); + MCDataFragment &DF = cast<MCDataFragment>(F); + assert(FragmentSize == DF.getContents().size() && "Invalid size!"); + OW->WriteBytes(DF.getContents().str()); break; } @@ -487,10 +578,14 @@ static void WriteFragmentData(const MCFragment &F, MCObjectWriter *OW) { break; } + case MCFragment::FT_Inst: + llvm_unreachable("unexpected inst fragment after lowering"); + break; + case MCFragment::FT_Org: { MCOrgFragment &OF = cast<MCOrgFragment>(F); - for (uint64_t i = 0, e = OF.getFileSize(); i != e; ++i) + for (uint64_t i = 0, e = FragmentSize; i != e; ++i) OW->Write8(uint8_t(OF.getValue())); break; @@ -502,14 +597,18 @@ static void WriteFragmentData(const MCFragment &F, MCObjectWriter *OW) { } } - assert(OW->getStream().tell() - Start == F.getFileSize()); + assert(OW->getStream().tell() - Start == FragmentSize); } void MCAssembler::WriteSectionData(const MCSectionData *SD, + const MCAsmLayout &Layout, MCObjectWriter *OW) const { + uint64_t SectionSize = Layout.getSectionSize(SD); + uint64_t SectionFileSize = Layout.getSectionFileSize(SD); + // Ignore virtual sections. if (getBackend().isVirtualSection(SD->getSection())) { - assert(SD->getFileSize() == 0); + assert(SectionFileSize == 0 && "Invalid size for section!"); return; } @@ -518,13 +617,13 @@ void MCAssembler::WriteSectionData(const MCSectionData *SD, for (MCSectionData::const_iterator it = SD->begin(), ie = SD->end(); it != ie; ++it) - WriteFragmentData(*it, OW); + WriteFragmentData(*this, Layout, *it, OW); // Add section padding. - assert(SD->getFileSize() >= SD->getSize() && "Invalid section sizes!"); - OW->WriteZeros(SD->getFileSize() - SD->getSize()); + assert(SectionFileSize >= SectionSize && "Invalid section sizes!"); + OW->WriteZeros(SectionFileSize - SectionSize); - assert(OW->getStream().tell() - Start == SD->getFileSize()); + assert(OW->getStream().tell() - Start == SectionFileSize); } void MCAssembler::Finish() { @@ -532,15 +631,35 @@ void MCAssembler::Finish() { llvm::errs() << "assembler backend - pre-layout\n--\n"; dump(); }); + // Assign section and fragment ordinals, all subsequent backend code is + // responsible for updating these in place. + unsigned SectionIndex = 0; + unsigned FragmentIndex = 0; + for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) { + it->setOrdinal(SectionIndex++); + + for (MCSectionData::iterator it2 = it->begin(), + ie2 = it->end(); it2 != ie2; ++it2) + it2->setOrdinal(FragmentIndex++); + } + // Layout until everything fits. - while (LayoutOnce()) + MCAsmLayout Layout(*this); + while (LayoutOnce(Layout)) continue; DEBUG_WITH_TYPE("mc-dump", { - llvm::errs() << "assembler backend - post-layout\n--\n"; + llvm::errs() << "assembler backend - post-relaxation\n--\n"; + dump(); }); + + // Finalize the layout, including fragment lowering. + FinishLayout(Layout); + + DEBUG_WITH_TYPE("mc-dump", { + llvm::errs() << "assembler backend - final-layout\n--\n"; dump(); }); - // FIXME: Factor out MCObjectWriter. + uint64_t StartOffset = OS.tell(); llvm::OwningPtr<MCObjectWriter> Writer(getBackend().createObjectWriter(OS)); if (!Writer) llvm_report_error("unable to create object writer!"); @@ -550,9 +669,6 @@ void MCAssembler::Finish() { Writer->ExecutePostLayoutBinding(*this); // Evaluate and apply the fixups, generating relocation entries as necessary. - // - // FIXME: Share layout object. - MCAsmLayout Layout(*this); for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) { for (MCSectionData::iterator it2 = it->begin(), ie2 = it->end(); it2 != ie2; ++it2) { @@ -571,7 +687,7 @@ void MCAssembler::Finish() { // The fixup was unresolved, we need a relocation. Inform the object // writer of the relocation, and give it an opportunity to adjust the // fixup value if need be. - Writer->RecordRelocation(*this, *DF, Fixup, Target, FixedValue); + Writer->RecordRelocation(*this, Layout, DF, Fixup, Target,FixedValue); } getBackend().ApplyFixup(Fixup, *DF, FixedValue); @@ -580,17 +696,17 @@ void MCAssembler::Finish() { } // Write the object file. - Writer->WriteObject(*this); + Writer->WriteObject(*this, Layout); OS.flush(); -} -bool MCAssembler::FixupNeedsRelaxation(MCAsmFixup &Fixup, MCDataFragment *DF) { - // FIXME: Share layout object. - MCAsmLayout Layout(*this); + stats::ObjectBytes += OS.tell() - StartOffset; +} - // Currently we only need to relax X86::reloc_pcrel_1byte. - if (unsigned(Fixup.Kind) != X86::reloc_pcrel_1byte) - return false; +bool MCAssembler::FixupNeedsRelaxation(const MCAsmFixup &Fixup, + const MCFragment *DF, + const MCAsmLayout &Layout) const { + if (getRelaxAll()) + return true; // If we cannot resolve the fixup value, it requires relaxation. MCValue Target; @@ -602,135 +718,141 @@ bool MCAssembler::FixupNeedsRelaxation(MCAsmFixup &Fixup, MCDataFragment *DF) { return int64_t(Value) != int64_t(int8_t(Value)); } -bool MCAssembler::LayoutOnce() { +bool MCAssembler::FragmentNeedsRelaxation(const MCInstFragment *IF, + const MCAsmLayout &Layout) const { + // If this inst doesn't ever need relaxation, ignore it. This occurs when we + // are intentionally pushing out inst fragments, or because we relaxed a + // previous instruction to one that doesn't need relaxation. + if (!getBackend().MayNeedRelaxation(IF->getInst(), IF->getFixups())) + return false; + + for (MCInstFragment::const_fixup_iterator it = IF->fixup_begin(), + ie = IF->fixup_end(); it != ie; ++it) + if (FixupNeedsRelaxation(*it, IF, Layout)) + return true; + + return false; +} + +bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) { + ++stats::RelaxationSteps; + // Layout the concrete sections and fragments. uint64_t Address = 0; - MCSectionData *Prev = 0; for (iterator it = begin(), ie = end(); it != ie; ++it) { - MCSectionData &SD = *it; - // Skip virtual sections. - if (getBackend().isVirtualSection(SD.getSection())) + if (getBackend().isVirtualSection(it->getSection())) continue; - // Align this section if necessary by adding padding bytes to the previous - // section. - if (uint64_t Pad = OffsetToAlignment(Address, it->getAlignment())) { - assert(Prev && "Missing prev section!"); - Prev->setFileSize(Prev->getFileSize() + Pad); - Address += Pad; - } - // Layout the section fragments and its size. - SD.setAddress(Address); - LayoutSection(SD); - Address += SD.getFileSize(); - - Prev = &SD; + Address = LayoutSection(*it, Layout, Address); } // Layout the virtual sections. for (iterator it = begin(), ie = end(); it != ie; ++it) { - MCSectionData &SD = *it; - - if (!getBackend().isVirtualSection(SD.getSection())) + if (!getBackend().isVirtualSection(it->getSection())) continue; - // Align this section if necessary by adding padding bytes to the previous - // section. - if (uint64_t Pad = OffsetToAlignment(Address, it->getAlignment())) - Address += Pad; - - SD.setAddress(Address); - LayoutSection(SD); - Address += SD.getSize(); + // Layout the section fragments and its size. + Address = LayoutSection(*it, Layout, Address); } - // Scan the fixups in order and relax any that don't fit. + // Scan for fragments that need relaxation. + bool WasRelaxed = false; for (iterator it = begin(), ie = end(); it != ie; ++it) { MCSectionData &SD = *it; for (MCSectionData::iterator it2 = SD.begin(), ie2 = SD.end(); it2 != ie2; ++it2) { - MCDataFragment *DF = dyn_cast<MCDataFragment>(it2); - if (!DF) + // Check if this is an instruction fragment that needs relaxation. + MCInstFragment *IF = dyn_cast<MCInstFragment>(it2); + if (!IF || !FragmentNeedsRelaxation(IF, Layout)) continue; - for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(), - ie3 = DF->fixup_end(); it3 != ie3; ++it3) { - MCAsmFixup &Fixup = *it3; - - // Check whether we need to relax this fixup. - if (!FixupNeedsRelaxation(Fixup, DF)) - continue; - - // Relax the instruction. - // - // FIXME: This is a huge temporary hack which just looks for x86 - // branches; the only thing we need to relax on x86 is - // 'X86::reloc_pcrel_1byte'. Once we have MCInst fragments, this will be - // replaced by a TargetAsmBackend hook (most likely tblgen'd) to relax - // an individual MCInst. - SmallVectorImpl<char> &C = DF->getContents(); - uint64_t PrevOffset = Fixup.Offset; - unsigned Amt = 0; - - // jcc instructions - if (unsigned(C[Fixup.Offset-1]) >= 0x70 && - unsigned(C[Fixup.Offset-1]) <= 0x7f) { - C[Fixup.Offset] = C[Fixup.Offset-1] + 0x10; - C[Fixup.Offset-1] = char(0x0f); - ++Fixup.Offset; - Amt = 4; - - // jmp rel8 - } else if (C[Fixup.Offset-1] == char(0xeb)) { - C[Fixup.Offset-1] = char(0xe9); - Amt = 3; - - } else - llvm_unreachable("unknown 1 byte pcrel instruction!"); - - Fixup.Value = MCBinaryExpr::Create( - MCBinaryExpr::Sub, Fixup.Value, - MCConstantExpr::Create(3, getContext()), - getContext()); - C.insert(C.begin() + Fixup.Offset, Amt, char(0)); - Fixup.Kind = MCFixupKind(X86::reloc_pcrel_4byte); - - // Update the remaining fixups, which have slid. - // - // FIXME: This is bad for performance, but will be eliminated by the - // move to MCInst specific fragments. - ++it3; - for (; it3 != ie3; ++it3) - it3->Offset += Amt; - - // Update all the symbols for this fragment, which may have slid. - // - // FIXME: This is really really bad for performance, but will be - // eliminated by the move to MCInst specific fragments. - for (MCAssembler::symbol_iterator it = symbol_begin(), - ie = symbol_end(); it != ie; ++it) { - MCSymbolData &SD = *it; - - if (it->getFragment() != DF) - continue; - - if (SD.getOffset() > PrevOffset) - SD.setOffset(SD.getOffset() + Amt); - } - - // Restart layout. - // - // FIXME: This is O(N^2), but will be eliminated once we have a smart - // MCAsmLayout object. - return true; + ++stats::RelaxedInstructions; + + // FIXME-PERF: We could immediately lower out instructions if we can tell + // they are fully resolved, to avoid retesting on later passes. + + // Relax the fragment. + + MCInst Relaxed; + getBackend().RelaxInstruction(IF, Relaxed); + + // Encode the new instruction. + // + // FIXME-PERF: If it matters, we could let the target do this. It can + // probably do so more efficiently in many cases. + SmallVector<MCFixup, 4> Fixups; + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + getEmitter().EncodeInstruction(Relaxed, VecOS, Fixups); + VecOS.flush(); + + // Update the instruction fragment. + int SlideAmount = Code.size() - IF->getInstSize(); + IF->setInst(Relaxed); + IF->getCode() = Code; + IF->getFixups().clear(); + for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { + MCFixup &F = Fixups[i]; + IF->getFixups().push_back(MCAsmFixup(F.getOffset(), *F.getValue(), + F.getKind())); } + + // Update the layout, and remember that we relaxed. If we are relaxing + // everything, we can skip this step since nothing will depend on updating + // the values. + if (!getRelaxAll()) + Layout.UpdateForSlide(IF, SlideAmount); + WasRelaxed = true; } } - return false; + return WasRelaxed; +} + +void MCAssembler::FinishLayout(MCAsmLayout &Layout) { + // Lower out any instruction fragments, to simplify the fixup application and + // output. + // + // FIXME-PERF: We don't have to do this, but the assumption is that it is + // cheap (we will mostly end up eliminating fragments and appending on to data + // fragments), so the extra complexity downstream isn't worth it. Evaluate + // this assumption. + for (iterator it = begin(), ie = end(); it != ie; ++it) { + MCSectionData &SD = *it; + + for (MCSectionData::iterator it2 = SD.begin(), + ie2 = SD.end(); it2 != ie2; ++it2) { + MCInstFragment *IF = dyn_cast<MCInstFragment>(it2); + if (!IF) + continue; + + // Create a new data fragment for the instruction. + // + // FIXME-PERF: Reuse previous data fragment if possible. + MCDataFragment *DF = new MCDataFragment(); + SD.getFragmentList().insert(it2, DF); + + // Update the data fragments layout data. + // + // FIXME: Add MCAsmLayout utility for this. + DF->setParent(IF->getParent()); + DF->setOrdinal(IF->getOrdinal()); + Layout.setFragmentOffset(DF, Layout.getFragmentOffset(IF)); + Layout.setFragmentEffectiveSize(DF, Layout.getFragmentEffectiveSize(IF)); + + // Copy in the data and the fixups. + DF->getContents().append(IF->getCode().begin(), IF->getCode().end()); + for (unsigned i = 0, e = IF->getFixups().size(); i != e; ++i) + DF->getFixups().push_back(IF->getFixups()[i]); + + // Delete the instruction fragment and update the iterator. + SD.getFragmentList().erase(IF); + it2 = DF; + } + } } // Debugging methods @@ -749,7 +871,7 @@ void MCFragment::dump() { raw_ostream &OS = llvm::errs(); OS << "<MCFragment " << (void*) this << " Offset:" << Offset - << " FileSize:" << FileSize; + << " EffectiveSize:" << EffectiveSize; OS << ">"; } @@ -801,6 +923,17 @@ void MCFillFragment::dump() { << " Count:" << getCount() << ">"; } +void MCInstFragment::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCInstFragment "; + this->MCFragment::dump(); + OS << "\n "; + OS << " Inst:"; + getInst().dump_pretty(OS); + OS << ">"; +} + void MCOrgFragment::dump() { raw_ostream &OS = llvm::errs(); diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index 37e8282..e02cbc7 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -23,9 +23,12 @@ MCContext::~MCContext() { // we don't need to free them here. } -MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name, bool isTemporary) { +MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) { assert(!Name.empty() && "Normal symbols cannot be unnamed!"); + // Determine whether this is an assembler temporary or normal label. + bool isTemporary = Name.startswith(MAI.getPrivateGlobalPrefix()); + // Do the lookup and get the entire StringMapEntry. We want access to the // key if we are creating the entry. StringMapEntry<MCSymbol*> &Entry = Symbols.GetOrCreateValue(Name); @@ -38,24 +41,17 @@ MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name, bool isTemporary) { return Result; } -MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name, bool isTemporary) { +MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) { SmallString<128> NameSV; Name.toVector(NameSV); - return GetOrCreateSymbol(NameSV.str(), isTemporary); + return GetOrCreateSymbol(NameSV.str()); } MCSymbol *MCContext::CreateTempSymbol() { - return GetOrCreateTemporarySymbol(Twine(MAI.getPrivateGlobalPrefix()) + - "tmp" + Twine(NextUniqueID++)); -} - -MCSymbol *MCContext::GetOrCreateTemporarySymbol(const Twine &Name) { - SmallString<128> NameSV; - Name.toVector(NameSV); - return GetOrCreateTemporarySymbol(NameSV.str()); + return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) + + "tmp" + Twine(NextUniqueID++)); } - MCSymbol *MCContext::LookupSymbol(StringRef Name) const { return Symbols.lookup(Name); } diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index 2759944..bc670ab 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -7,7 +7,9 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "mcexpr" #include "llvm/MC/MCExpr.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" @@ -19,6 +21,12 @@ #include "llvm/Target/TargetAsmBackend.h" using namespace llvm; +namespace { +namespace stats { +STATISTIC(MCExprEvaluate, "Number of MCExpr evaluations"); +} +} + void MCExpr::print(raw_ostream &OS) const { switch (getKind()) { case MCExpr::Target: @@ -146,12 +154,6 @@ const MCSymbolRefExpr *MCSymbolRefExpr::Create(StringRef Name, VariantKind Kind, return Create(Ctx.GetOrCreateSymbol(Name), Kind, Ctx); } -const MCSymbolRefExpr *MCSymbolRefExpr::CreateTemp(StringRef Name, - VariantKind Kind, - MCContext &Ctx) { - return Create(Ctx.GetOrCreateTemporarySymbol(Name), Kind, Ctx); -} - StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { switch (Kind) { default: @@ -194,6 +196,12 @@ void MCTargetExpr::Anchor() {} bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout *Layout) const { MCValue Value; + // Fast path constants. + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(this)) { + Res = CE->getValue(); + return true; + } + if (!EvaluateAsRelocatable(Value, Layout) || !Value.isAbsolute()) return false; @@ -225,6 +233,8 @@ static bool EvaluateSymbolicAdd(const MCValue &LHS,const MCSymbolRefExpr *RHS_A, bool MCExpr::EvaluateAsRelocatable(MCValue &Res, const MCAsmLayout *Layout) const { + ++stats::MCExprEvaluate; + switch (getKind()) { case Target: return cast<MCTargetExpr>(this)->EvaluateAsRelocatableImpl(Res, Layout); @@ -252,8 +262,8 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res, Layout->getAssembler().getSymbolData(Res.getSymA()->getSymbol()); MCSymbolData &B = Layout->getAssembler().getSymbolData(Res.getSymB()->getSymbol()); - Res = MCValue::get(+ A.getFragment()->getAddress() + A.getOffset() - - B.getFragment()->getAddress() - B.getOffset() + Res = MCValue::get(+ Layout->getSymbolAddress(&A) + - Layout->getSymbolAddress(&B) + Res.getConstant()); } diff --git a/lib/MC/MCInst.cpp b/lib/MC/MCInst.cpp index 0634c9f..de142dc 100644 --- a/lib/MC/MCInst.cpp +++ b/lib/MC/MCInst.cpp @@ -9,6 +9,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstPrinter.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -43,6 +44,22 @@ void MCInst::print(raw_ostream &OS, const MCAsmInfo *MAI) const { OS << ">"; } +void MCInst::dump_pretty(raw_ostream &OS, const MCAsmInfo *MAI, + const MCInstPrinter *Printer, + StringRef Separator) const { + OS << "<MCInst #" << getOpcode(); + + // Show the instruction opcode name if we have access to a printer. + if (Printer) + OS << ' ' << Printer->getOpcodeName(getOpcode()); + + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + OS << Separator; + getOperand(i).print(OS, MAI); + } + OS << ">\n"; +} + void MCInst::dump() const { print(dbgs(), 0); dbgs() << "\n"; diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 9504392..120f837 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -18,6 +18,8 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetAsmBackend.h" + using namespace llvm; namespace { @@ -57,6 +59,15 @@ private: return 0; } + /// Get a data fragment to write into, creating a new one if the current + /// fragment is not a data fragment. + MCDataFragment *getOrCreateDataFragment() const { + MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment()); + if (!F) + F = new MCDataFragment(CurSectionData); + return F; + } + public: MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB, raw_ostream &_OS, MCCodeEmitter *_Emitter) @@ -64,6 +75,8 @@ public: CurSectionData(0) {} ~MCMachOStreamer() {} + MCAssembler &getAssembler() { return Assembler; } + const MCExpr *AddValueSymbols(const MCExpr *Value) { switch (Value->getKind()) { case MCExpr::Target: assert(0 && "Can't handle target exprs yet!"); @@ -150,11 +163,11 @@ void MCMachOStreamer::SwitchSection(const MCSection *Section) { void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); - // FIXME: We should also use offsets into Fill fragments. - MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment()); - if (!F) - F = new MCDataFragment(CurSectionData); - + // FIXME: This is wasteful, we don't necessarily need to create a data + // fragment. Instead, we should mark the symbol as pointing into the data + // fragment if it exists, otherwise we should just queue the label and set its + // fragment pointer when we emit the next fragment. + MCDataFragment *F = getOrCreateDataFragment(); MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol); assert(!SD.getFragment() && "Unexpected fragment on symbol data!"); SD.setFragment(F); @@ -307,17 +320,12 @@ void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, } void MCMachOStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { - MCDataFragment *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment()); - if (!DF) - DF = new MCDataFragment(CurSectionData); - DF->getContents().append(Data.begin(), Data.end()); + getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end()); } void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size, unsigned AddrSpace) { - MCDataFragment *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment()); - if (!DF) - DF = new MCDataFragment(CurSectionData); + MCDataFragment *DF = getOrCreateDataFragment(); // Avoid fixups when possible. int64_t AbsValue; @@ -349,8 +357,7 @@ void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment, unsigned MaxBytesToEmit) { if (MaxBytesToEmit == 0) MaxBytesToEmit = ByteAlignment; - // FIXME the 0x90 is the default x86 1 byte nop opcode. - new MCAlignFragment(ByteAlignment, 0x90, 1, MaxBytesToEmit, + new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit, true /* EmitNops */, CurSectionData); // Update the maximum alignment on the current section if necessary. @@ -371,20 +378,54 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) { CurSectionData->setHasInstructions(true); + // FIXME-PERF: Common case is that we don't need to relax, encode directly + // onto the data fragments buffers. + SmallVector<MCFixup, 4> Fixups; SmallString<256> Code; raw_svector_ostream VecOS(Code); Assembler.getEmitter().EncodeInstruction(Inst, VecOS, Fixups); VecOS.flush(); - // Add the fixups and data. - MCDataFragment *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment()); - if (!DF) - DF = new MCDataFragment(CurSectionData); + // FIXME: Eliminate this copy. + SmallVector<MCAsmFixup, 4> AsmFixups; for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { MCFixup &F = Fixups[i]; - DF->addFixup(MCAsmFixup(DF->getContents().size()+F.getOffset(), - *F.getValue(), F.getKind())); + AsmFixups.push_back(MCAsmFixup(F.getOffset(), *F.getValue(), + F.getKind())); + } + + // See if we might need to relax this instruction, if so it needs its own + // fragment. + // + // FIXME-PERF: Support target hook to do a fast path that avoids the encoder, + // when we can immediately tell that we will get something which might need + // relaxation (and compute its size). + // + // FIXME-PERF: We should also be smart about immediately relaxing instructions + // which we can already show will never possibly fit (we can also do a very + // good job of this before we do the first relaxation pass, because we have + // total knowledge about undefined symbols at that point). Even now, though, + // we can do a decent job, especially on Darwin where scattering means that we + // are going to often know that we can never fully resolve a fixup. + if (Assembler.getBackend().MayNeedRelaxation(Inst, AsmFixups)) { + MCInstFragment *IF = new MCInstFragment(Inst, CurSectionData); + + // Add the fixups and data. + // + // FIXME: Revisit this design decision when relaxation is done, we may be + // able to get away with not storing any extra data in the MCInst. + IF->getCode() = Code; + IF->getFixups() = AsmFixups; + + return; + } + + // Add the fixups and data. + MCDataFragment *DF = getOrCreateDataFragment(); + for (unsigned i = 0, e = AsmFixups.size(); i != e; ++i) { + AsmFixups[i].Offset += DF->getContents().size(); + DF->addFixup(AsmFixups[i]); } DF->getContents().append(Code.begin(), Code.end()); } @@ -394,6 +435,10 @@ void MCMachOStreamer::Finish() { } MCStreamer *llvm::createMachOStreamer(MCContext &Context, TargetAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *CE) { - return new MCMachOStreamer(Context, TAB, OS, CE); + raw_ostream &OS, MCCodeEmitter *CE, + bool RelaxAll) { + MCMachOStreamer *S = new MCMachOStreamer(Context, TAB, OS, CE); + if (RelaxAll) + S->getAssembler().setRelaxAll(true); + return S; } diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 4ec5247..24616b4 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -238,9 +238,7 @@ bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) { } MCSymbol *AsmParser::CreateSymbol(StringRef Name) { - // If the label starts with L it is an assembler temporary label. - if (Name.startswith("L")) - return Ctx.GetOrCreateTemporarySymbol(Name); + // FIXME: Inline into callers. return Ctx.GetOrCreateSymbol(Name); } diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp index 24c89ef..f6e9636 100644 --- a/lib/MC/MCSection.cpp +++ b/lib/MC/MCSection.cpp @@ -26,7 +26,11 @@ MCSection::~MCSection() { MCSectionCOFF *MCSectionCOFF:: Create(StringRef Name, bool IsDirective, SectionKind K, MCContext &Ctx) { - return new (Ctx) MCSectionCOFF(Name, IsDirective, K); + char *NameCopy = static_cast<char*>( + Ctx.Allocate(Name.size(), /*Alignment=*/1)); + memcpy(NameCopy, Name.data(), Name.size()); + return new (Ctx) MCSectionCOFF(StringRef(NameCopy, Name.size()), + IsDirective, K); } void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index 4b08c22..e073eb5 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" @@ -271,12 +272,14 @@ public: assert(OS.tell() - Start == SegmentLoadCommandSize); } - void WriteSection(const MCAssembler &Asm, const MCSectionData &SD, - uint64_t FileOffset, uint64_t RelocationsStart, - unsigned NumRelocations) { + void WriteSection(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCSectionData &SD, uint64_t FileOffset, + uint64_t RelocationsStart, unsigned NumRelocations) { + uint64_t SectionSize = Layout.getSectionSize(&SD); + // The offset is unused for virtual sections. if (Asm.getBackend().isVirtualSection(SD.getSection())) { - assert(SD.getFileSize() == 0 && "Invalid file size!"); + assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!"); FileOffset = 0; } @@ -292,11 +295,11 @@ public: WriteBytes(Section.getSectionName(), 16); WriteBytes(Section.getSegmentName(), 16); if (Is64Bit) { - Write64(SD.getAddress()); // address - Write64(SD.getSize()); // size + Write64(Layout.getSectionAddress(&SD)); // address + Write64(SectionSize); // size } else { - Write32(SD.getAddress()); // address - Write32(SD.getSize()); // size + Write32(Layout.getSectionAddress(&SD)); // address + Write32(SectionSize); // size } Write32(FileOffset); @@ -372,7 +375,7 @@ public: assert(OS.tell() - Start == DysymtabLoadCommandSize); } - void WriteNlist(MachSymbolData &MSD) { + void WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout) { MCSymbolData &Data = *MSD.SymbolData; const MCSymbol &Symbol = Data.getSymbol(); uint8_t Type = 0; @@ -403,7 +406,7 @@ public: if (Symbol.isAbsolute()) { llvm_unreachable("FIXME: Not yet implemented!"); } else { - Address = Data.getAddress(); + Address = Layout.getSymbolAddress(&Data); } } else if (Data.isCommon()) { // Common symbols are encoded with the size in the address @@ -437,8 +440,22 @@ public: Write32(Address); } - void RecordX86_64Relocation(const MCAssembler &Asm, - const MCDataFragment &Fragment, + // FIXME: We really need to improve the relocation validation. Basically, we + // want to implement a separate computation which evaluates the relocation + // entry as the linker would, and verifies that the resultant fixup value is + // exactly what the encoder wanted. This will catch several classes of + // problems: + // + // - Relocation entry bugs, the two algorithms are unlikely to have the same + // exact bug. + // + // - Relaxation issues, where we forget to relax something. + // + // - Input errors, where something cannot be correctly encoded. 'as' allows + // these through in many cases. + + void RecordX86_64Relocation(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCAsmFixup &Fixup, MCValue Target, uint64_t &FixedValue) { unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind); @@ -446,7 +463,7 @@ public: unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind); // See <reloc.h>. - uint32_t Address = Fragment.getOffset() + Fixup.Offset; + uint32_t Address = Layout.getFragmentOffset(Fragment) + Fixup.Offset; int64_t Value = 0; unsigned Index = 0; unsigned IsExtern = 0; @@ -480,11 +497,11 @@ public: } else if (Target.getSymB()) { // A - B + constant const MCSymbol *A = &Target.getSymA()->getSymbol(); MCSymbolData &A_SD = Asm.getSymbolData(*A); - const MCSymbolData *A_Base = Asm.getAtom(&A_SD); + const MCSymbolData *A_Base = Asm.getAtom(Layout, &A_SD); const MCSymbol *B = &Target.getSymB()->getSymbol(); MCSymbolData &B_SD = Asm.getSymbolData(*B); - const MCSymbolData *B_Base = Asm.getAtom(&B_SD); + const MCSymbolData *B_Base = Asm.getAtom(Layout, &B_SD); // Neither symbol can be modified. if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || @@ -507,8 +524,8 @@ public: if (A_Base == B_Base) llvm_report_error("unsupported relocation with identical base"); - Value += A_SD.getAddress() - A_Base->getAddress(); - Value -= B_SD.getAddress() - B_Base->getAddress(); + Value += Layout.getSymbolAddress(&A_SD) - Layout.getSymbolAddress(A_Base); + Value -= Layout.getSymbolAddress(&B_SD) - Layout.getSymbolAddress(B_Base); Index = A_Base->getIndex(); IsExtern = 1; @@ -521,7 +538,7 @@ public: (Log2Size << 25) | (IsExtern << 27) | (Type << 28)); - Relocations[Fragment.getParent()].push_back(MRE); + Relocations[Fragment->getParent()].push_back(MRE); Index = B_Base->getIndex(); IsExtern = 1; @@ -529,7 +546,7 @@ public: } else { const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); MCSymbolData &SD = Asm.getSymbolData(*Symbol); - const MCSymbolData *Base = Asm.getAtom(&SD); + const MCSymbolData *Base = Asm.getAtom(Layout, &SD); // x86_64 almost always uses external relocations, except when there is no // symbol to use as a base address (a local symbol with no preceeding @@ -540,19 +557,12 @@ public: // Add the local offset, if needed. if (Base != &SD) - Value += SD.getAddress() - Base->getAddress(); + Value += Layout.getSymbolAddress(&SD) - Layout.getSymbolAddress(Base); } else { - // The index is the section ordinal. - // - // FIXME: O(N) - Index = 1; - MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end(); - for (; it != ie; ++it, ++Index) - if (&*it == SD.getFragment()->getParent()) - break; - assert(it != ie && "Unable to find section index!"); + // The index is the section ordinal (1-based). + Index = SD.getFragment()->getParent()->getOrdinal() + 1; IsExtern = 0; - Value += SD.getAddress(); + Value += Layout.getSymbolAddress(&SD); if (IsPCRel) Value -= Address + (1 << Log2Size); @@ -602,9 +612,16 @@ public: } } } else { - if (Modifier == MCSymbolRefExpr::VK_GOT) + if (Modifier == MCSymbolRefExpr::VK_GOT) { Type = RIT_X86_64_GOT; - else if (Modifier != MCSymbolRefExpr::VK_None) + } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { + // GOTPCREL is allowed as a modifier on non-PCrel instructions, in + // which case all we do is set the PCrel bit in the relocation entry; + // this is used with exception handling, for example. The source is + // required to include any necessary offset directly. + Type = RIT_X86_64_GOT; + IsPCRel = 1; + } else if (Modifier != MCSymbolRefExpr::VK_None) llvm_report_error("unsupported symbol modifier in relocation"); else Type = RIT_X86_64_Unsigned; @@ -622,14 +639,15 @@ public: (Log2Size << 25) | (IsExtern << 27) | (Type << 28)); - Relocations[Fragment.getParent()].push_back(MRE); + Relocations[Fragment->getParent()].push_back(MRE); } void RecordScatteredRelocation(const MCAssembler &Asm, - const MCFragment &Fragment, + const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCAsmFixup &Fixup, MCValue Target, uint64_t &FixedValue) { - uint32_t Address = Fragment.getOffset() + Fixup.Offset; + uint32_t Address = Layout.getFragmentOffset(Fragment) + Fixup.Offset; unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind); unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind); unsigned Type = RIT_Vanilla; @@ -642,7 +660,7 @@ public: llvm_report_error("symbol '" + A->getName() + "' can not be undefined in a subtraction expression"); - uint32_t Value = A_SD->getAddress(); + uint32_t Value = Layout.getSymbolAddress(A_SD); uint32_t Value2 = 0; if (const MCSymbolRefExpr *B = Target.getSymB()) { @@ -658,7 +676,7 @@ public: // relocation types from the linkers point of view, this is done solely // for pedantic compatibility with 'as'. Type = A_SD->isExternal() ? RIT_Difference : RIT_LocalDifference; - Value2 = B_SD->getAddress(); + Value2 = Layout.getSymbolAddress(B_SD); } // Relocations are written out in reverse order, so the PAIR comes first. @@ -670,7 +688,7 @@ public: (IsPCRel << 30) | RF_Scattered); MRE.Word1 = Value2; - Relocations[Fragment.getParent()].push_back(MRE); + Relocations[Fragment->getParent()].push_back(MRE); } MachRelocationEntry MRE; @@ -680,14 +698,14 @@ public: (IsPCRel << 30) | RF_Scattered); MRE.Word1 = Value; - Relocations[Fragment.getParent()].push_back(MRE); + Relocations[Fragment->getParent()].push_back(MRE); } - void RecordRelocation(const MCAssembler &Asm, const MCDataFragment &Fragment, - const MCAsmFixup &Fixup, MCValue Target, - uint64_t &FixedValue) { + void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCAsmFixup &Fixup, + MCValue Target, uint64_t &FixedValue) { if (Is64Bit) { - RecordX86_64Relocation(Asm, Fragment, Fixup, Target, FixedValue); + RecordX86_64Relocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); return; } @@ -702,12 +720,12 @@ public: if (Target.getSymB() || (Target.getSymA() && !Target.getSymA()->getSymbol().isUndefined() && Offset)) { - RecordScatteredRelocation(Asm, Fragment, Fixup, Target, FixedValue); + RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,Target,FixedValue); return; } // See <reloc.h>. - uint32_t Address = Fragment.getOffset() + Fixup.Offset; + uint32_t Address = Layout.getFragmentOffset(Fragment) + Fixup.Offset; uint32_t Value = 0; unsigned Index = 0; unsigned IsExtern = 0; @@ -729,16 +747,9 @@ public: Index = SD->getIndex(); Value = 0; } else { - // The index is the section ordinal. - // - // FIXME: O(N) - Index = 1; - MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end(); - for (; it != ie; ++it, ++Index) - if (&*it == SD->getFragment()->getParent()) - break; - assert(it != ie && "Unable to find section index!"); - Value = SD->getAddress(); + // The index is the section ordinal (1-based). + Index = SD->getFragment()->getParent()->getOrdinal() + 1; + Value = Layout.getSymbolAddress(SD); } Type = RIT_Vanilla; @@ -752,7 +763,7 @@ public: (Log2Size << 25) | (IsExtern << 27) | (Type << 28)); - Relocations[Fragment.getParent()].push_back(MRE); + Relocations[Fragment->getParent()].push_back(MRE); } void BindIndirectSymbols(MCAssembler &Asm) { @@ -920,7 +931,7 @@ public: UndefinedSymbolData); } - void WriteObject(const MCAssembler &Asm) { + void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout) { unsigned NumSections = Asm.size(); // The section data starts after the header, the segment load command (and @@ -948,16 +959,17 @@ public: for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) { const MCSectionData &SD = *it; + uint64_t Address = Layout.getSectionAddress(&SD); + uint64_t Size = Layout.getSectionSize(&SD); + uint64_t FileSize = Layout.getSectionFileSize(&SD); - VMSize = std::max(VMSize, SD.getAddress() + SD.getSize()); + VMSize = std::max(VMSize, Address + Size); if (Asm.getBackend().isVirtualSection(SD.getSection())) continue; - SectionDataSize = std::max(SectionDataSize, - SD.getAddress() + SD.getSize()); - SectionDataFileSize = std::max(SectionDataFileSize, - SD.getAddress() + SD.getFileSize()); + SectionDataSize = std::max(SectionDataSize, Address + Size); + SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize); } // The section data is padded to 4 bytes. @@ -978,8 +990,8 @@ public: ie = Asm.end(); it != ie; ++it) { std::vector<MachRelocationEntry> &Relocs = Relocations[it]; unsigned NumRelocs = Relocs.size(); - uint64_t SectionStart = SectionDataStart + it->getAddress(); - WriteSection(Asm, *it, SectionStart, RelocTableEnd, NumRelocs); + uint64_t SectionStart = SectionDataStart + Layout.getSectionAddress(it); + WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs); RelocTableEnd += NumRelocs * RelocationInfoSize; } @@ -1020,7 +1032,7 @@ public: // Write the actual section data. for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) - Asm.WriteSectionData(it, Writer); + Asm.WriteSectionData(it, Layout, Writer); // Write the extra padding. WriteZeros(SectionDataPadding); @@ -1066,11 +1078,11 @@ public: // Write the symbol table entries. for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) - WriteNlist(LocalSymbolData[i]); + WriteNlist(LocalSymbolData[i], Layout); for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) - WriteNlist(ExternalSymbolData[i]); + WriteNlist(ExternalSymbolData[i], Layout); for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) - WriteNlist(UndefinedSymbolData[i]); + WriteNlist(UndefinedSymbolData[i], Layout); // Write the string table. OS << StringTable.str(); @@ -1097,13 +1109,15 @@ void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) { } void MachObjectWriter::RecordRelocation(const MCAssembler &Asm, - const MCDataFragment &Fragment, + const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCAsmFixup &Fixup, MCValue Target, uint64_t &FixedValue) { - ((MachObjectWriterImpl*) Impl)->RecordRelocation(Asm, Fragment, Fixup, + ((MachObjectWriterImpl*) Impl)->RecordRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); } -void MachObjectWriter::WriteObject(const MCAssembler &Asm) { - ((MachObjectWriterImpl*) Impl)->WriteObject(Asm); +void MachObjectWriter::WriteObject(const MCAssembler &Asm, + const MCAsmLayout &Layout) { + ((MachObjectWriterImpl*) Impl)->WriteObject(Asm, Layout); } diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 8f860a6..485bf4d 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -65,7 +65,7 @@ namespace llvm { pow(5, power) is power * 815 / (351 * integerPartWidth) + 1 - + However, whilst the result may require only this many parts, because we are multiplying two values to get it, the multiplication may require an extra part with the excess part @@ -100,15 +100,15 @@ hexDigitValue(unsigned int c) unsigned int r; r = c - '0'; - if(r <= 9) + if (r <= 9) return r; r = c - 'A'; - if(r <= 5) + if (r <= 5) return r + 10; r = c - 'a'; - if(r <= 5) + if (r <= 5) return r + 10; return -1U; @@ -116,8 +116,8 @@ hexDigitValue(unsigned int c) static inline void assertArithmeticOK(const llvm::fltSemantics &semantics) { - assert(semantics.arithmeticOK - && "Compile-time arithmetic does not support these semantics"); + assert(semantics.arithmeticOK && + "Compile-time arithmetic does not support these semantics"); } /* Return the value of a decimal exponent of the form @@ -179,37 +179,37 @@ totalExponent(StringRef::iterator p, StringRef::iterator end, assert(p != end && "Exponent has no digits"); negative = *p == '-'; - if(*p == '-' || *p == '+') { + if (*p == '-' || *p == '+') { p++; assert(p != end && "Exponent has no digits"); } unsignedExponent = 0; overflow = false; - for(; p != end; ++p) { + for (; p != end; ++p) { unsigned int value; value = decDigitValue(*p); assert(value < 10U && "Invalid character in exponent"); unsignedExponent = unsignedExponent * 10 + value; - if(unsignedExponent > 65535) + if (unsignedExponent > 65535) overflow = true; } - if(exponentAdjustment > 65535 || exponentAdjustment < -65536) + if (exponentAdjustment > 65535 || exponentAdjustment < -65536) overflow = true; - if(!overflow) { + if (!overflow) { exponent = unsignedExponent; - if(negative) + if (negative) exponent = -exponent; exponent += exponentAdjustment; - if(exponent > 65535 || exponent < -65536) + if (exponent > 65535 || exponent < -65536) overflow = true; } - if(overflow) + if (overflow) exponent = negative ? -65536: 65535; return exponent; @@ -221,15 +221,15 @@ skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, { StringRef::iterator p = begin; *dot = end; - while(*p == '0' && p != end) + while (*p == '0' && p != end) p++; - if(*p == '.') { + if (*p == '.') { *dot = p++; assert(end - begin != 1 && "Significand has no digits"); - while(*p == '0' && p != end) + while (*p == '0' && p != end) p++; } @@ -323,13 +323,13 @@ trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, /* If the first trailing digit isn't 0 or 8 we can work out the fraction immediately. */ - if(digitValue > 8) + if (digitValue > 8) return lfMoreThanHalf; - else if(digitValue < 8 && digitValue > 0) + else if (digitValue < 8 && digitValue > 0) return lfLessThanHalf; /* Otherwise we need to find the first non-zero digit. */ - while(*p == '0') + while (*p == '0') p++; assert(p != end && "Invalid trailing hexadecimal fraction!"); @@ -338,7 +338,7 @@ trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, /* If we ran off the end it is exactly zero or one-half, otherwise a little more. */ - if(hexDigit == -1U) + if (hexDigit == -1U) return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; else return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; @@ -356,12 +356,12 @@ lostFractionThroughTruncation(const integerPart *parts, lsb = APInt::tcLSB(parts, partCount); /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ - if(bits <= lsb) + if (bits <= lsb) return lfExactlyZero; - if(bits == lsb + 1) + if (bits == lsb + 1) return lfExactlyHalf; - if(bits <= partCount * integerPartWidth - && APInt::tcExtractBit(parts, bits - 1)) + if (bits <= partCount * integerPartWidth && + APInt::tcExtractBit(parts, bits - 1)) return lfMoreThanHalf; return lfLessThanHalf; @@ -385,10 +385,10 @@ static lostFraction combineLostFractions(lostFraction moreSignificant, lostFraction lessSignificant) { - if(lessSignificant != lfExactlyZero) { - if(moreSignificant == lfExactlyZero) + if (lessSignificant != lfExactlyZero) { + if (moreSignificant == lfExactlyZero) moreSignificant = lfLessThanHalf; - else if(moreSignificant == lfExactlyHalf) + else if (moreSignificant == lfExactlyHalf) moreSignificant = lfMoreThanHalf; } @@ -468,7 +468,7 @@ powerOf5(integerPart *dst, unsigned int power) 15625, 78125 }; integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; pow5s[0] = 78125 * 5; - + unsigned int partsCount[16] = { 1 }; integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; unsigned int result; @@ -588,14 +588,14 @@ APFloat::initialize(const fltSemantics *ourSemantics) semantics = ourSemantics; count = partCount(); - if(count > 1) + if (count > 1) significand.parts = new integerPart[count]; } void APFloat::freeSignificand() { - if(partCount() > 1) + if (partCount() > 1) delete [] significand.parts; } @@ -609,7 +609,7 @@ APFloat::assign(const APFloat &rhs) exponent = rhs.exponent; sign2 = rhs.sign2; exponent2 = rhs.exponent2; - if(category == fcNormal || category == fcNaN) + if (category == fcNormal || category == fcNaN) copySignificand(rhs); } @@ -683,8 +683,8 @@ APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative, APFloat & APFloat::operator=(const APFloat &rhs) { - if(this != &rhs) { - if(semantics != rhs.semantics) { + if (this != &rhs) { + if (semantics != rhs.semantics) { freeSignificand(); initialize(rhs.semantics); } @@ -881,7 +881,7 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) precision = semantics->precision; newPartsCount = partCountForBits(precision * 2); - if(newPartsCount > 4) + if (newPartsCount > 4) fullSignificand = new integerPart[newPartsCount]; else fullSignificand = scratch; @@ -896,7 +896,7 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; exponent += rhs.exponent; - if(addend) { + if (addend) { Significand savedSignificand = significand; const fltSemantics *savedSemantics = semantics; fltSemantics extendedSemantics; @@ -905,18 +905,17 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) /* Normalize our MSB. */ extendedPrecision = precision + precision - 1; - if(omsb != extendedPrecision) - { - APInt::tcShiftLeft(fullSignificand, newPartsCount, - extendedPrecision - omsb); - exponent -= extendedPrecision - omsb; - } + if (omsb != extendedPrecision) { + APInt::tcShiftLeft(fullSignificand, newPartsCount, + extendedPrecision - omsb); + exponent -= extendedPrecision - omsb; + } /* Create new semantics. */ extendedSemantics = *semantics; extendedSemantics.precision = extendedPrecision; - if(newPartsCount == 1) + if (newPartsCount == 1) significand.part = fullSignificand[0]; else significand.parts = fullSignificand; @@ -928,7 +927,7 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) lost_fraction = addOrSubtractSignificand(extendedAddend, false); /* Restore our state. */ - if(newPartsCount == 1) + if (newPartsCount == 1) fullSignificand[0] = significand.part; significand = savedSignificand; semantics = savedSemantics; @@ -938,7 +937,7 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) exponent -= (precision - 1); - if(omsb > precision) { + if (omsb > precision) { unsigned int bits, significantParts; lostFraction lf; @@ -951,7 +950,7 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); - if(newPartsCount > 4) + if (newPartsCount > 4) delete [] fullSignificand; return lost_fraction; @@ -973,7 +972,7 @@ APFloat::divideSignificand(const APFloat &rhs) rhsSignificand = rhs.significandParts(); partsCount = partCount(); - if(partsCount > 2) + if (partsCount > 2) dividend = new integerPart[partsCount * 2]; else dividend = scratch; @@ -981,7 +980,7 @@ APFloat::divideSignificand(const APFloat &rhs) divisor = dividend + partsCount; /* Copy the dividend and divisor as they will be modified in-place. */ - for(i = 0; i < partsCount; i++) { + for (i = 0; i < partsCount; i++) { dividend[i] = lhsSignificand[i]; divisor[i] = rhsSignificand[i]; lhsSignificand[i] = 0; @@ -993,14 +992,14 @@ APFloat::divideSignificand(const APFloat &rhs) /* Normalize the divisor. */ bit = precision - APInt::tcMSB(divisor, partsCount) - 1; - if(bit) { + if (bit) { exponent += bit; APInt::tcShiftLeft(divisor, partsCount, bit); } /* Normalize the dividend. */ bit = precision - APInt::tcMSB(dividend, partsCount) - 1; - if(bit) { + if (bit) { exponent -= bit; APInt::tcShiftLeft(dividend, partsCount, bit); } @@ -1008,15 +1007,15 @@ APFloat::divideSignificand(const APFloat &rhs) /* Ensure the dividend >= divisor initially for the loop below. Incidentally, this means that the division loop below is guaranteed to set the integer bit to one. */ - if(APInt::tcCompare(dividend, divisor, partsCount) < 0) { + if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { exponent--; APInt::tcShiftLeft(dividend, partsCount, 1); assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); } /* Long division. */ - for(bit = precision; bit; bit -= 1) { - if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) { + for (bit = precision; bit; bit -= 1) { + if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { APInt::tcSubtract(dividend, divisor, 0, partsCount); APInt::tcSetBit(lhsSignificand, bit - 1); } @@ -1027,16 +1026,16 @@ APFloat::divideSignificand(const APFloat &rhs) /* Figure out the lost fraction. */ int cmp = APInt::tcCompare(dividend, divisor, partsCount); - if(cmp > 0) + if (cmp > 0) lost_fraction = lfMoreThanHalf; - else if(cmp == 0) + else if (cmp == 0) lost_fraction = lfExactlyHalf; - else if(APInt::tcIsZero(dividend, partsCount)) + else if (APInt::tcIsZero(dividend, partsCount)) lost_fraction = lfExactlyZero; else lost_fraction = lfLessThanHalf; - if(partsCount > 2) + if (partsCount > 2) delete [] dividend; return lost_fraction; @@ -1072,7 +1071,7 @@ APFloat::shiftSignificandLeft(unsigned int bits) { assert(bits < semantics->precision); - if(bits) { + if (bits) { unsigned int partsCount = partCount(); APInt::tcShiftLeft(significandParts(), partsCount, bits); @@ -1095,13 +1094,13 @@ APFloat::compareAbsoluteValue(const APFloat &rhs) const /* If exponents are equal, do an unsigned bignum comparison of the significands. */ - if(compare == 0) + if (compare == 0) compare = APInt::tcCompare(significandParts(), rhs.significandParts(), partCount()); - if(compare > 0) + if (compare > 0) return cmpGreaterThan; - else if(compare < 0) + else if (compare < 0) return cmpLessThan; else return cmpEqual; @@ -1113,14 +1112,13 @@ APFloat::opStatus APFloat::handleOverflow(roundingMode rounding_mode) { /* Infinity? */ - if(rounding_mode == rmNearestTiesToEven - || rounding_mode == rmNearestTiesToAway - || (rounding_mode == rmTowardPositive && !sign) - || (rounding_mode == rmTowardNegative && sign)) - { - category = fcInfinity; - return (opStatus) (opOverflow | opInexact); - } + if (rounding_mode == rmNearestTiesToEven || + rounding_mode == rmNearestTiesToAway || + (rounding_mode == rmTowardPositive && !sign) || + (rounding_mode == rmTowardNegative && sign)) { + category = fcInfinity; + return (opStatus) (opOverflow | opInexact); + } /* Otherwise we become the largest finite number. */ category = fcNormal; @@ -1155,11 +1153,11 @@ APFloat::roundAwayFromZero(roundingMode rounding_mode, return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; case rmNearestTiesToEven: - if(lost_fraction == lfMoreThanHalf) + if (lost_fraction == lfMoreThanHalf) return true; /* Our zeroes don't have a significand to test. */ - if(lost_fraction == lfExactlyHalf && category != fcZero) + if (lost_fraction == lfExactlyHalf && category != fcZero) return APInt::tcExtractBit(significandParts(), bit); return false; @@ -1182,13 +1180,13 @@ APFloat::normalize(roundingMode rounding_mode, unsigned int omsb; /* One, not zero, based MSB. */ int exponentChange; - if(category != fcNormal) + if (category != fcNormal) return opOK; /* Before rounding normalize the exponent of fcNormal numbers. */ omsb = significandMSB() + 1; - if(omsb) { + if (omsb) { /* OMSB is numbered from 1. We want to place it in the integer bit numbered PRECISON if possible, with a compensating change in the exponent. */ @@ -1196,16 +1194,16 @@ APFloat::normalize(roundingMode rounding_mode, /* If the resulting exponent is too high, overflow according to the rounding mode. */ - if(exponent + exponentChange > semantics->maxExponent) + if (exponent + exponentChange > semantics->maxExponent) return handleOverflow(rounding_mode); /* Subnormal numbers have exponent minExponent, and their MSB is forced based on that. */ - if(exponent + exponentChange < semantics->minExponent) + if (exponent + exponentChange < semantics->minExponent) exponentChange = semantics->minExponent - exponent; /* Shifting left is easy as we don't lose precision. */ - if(exponentChange < 0) { + if (exponentChange < 0) { assert(lost_fraction == lfExactlyZero); shiftSignificandLeft(-exponentChange); @@ -1213,7 +1211,7 @@ APFloat::normalize(roundingMode rounding_mode, return opOK; } - if(exponentChange > 0) { + if (exponentChange > 0) { lostFraction lf; /* Shift right and capture any new lost fraction. */ @@ -1222,7 +1220,7 @@ APFloat::normalize(roundingMode rounding_mode, lost_fraction = combineLostFractions(lf, lost_fraction); /* Keep OMSB up-to-date. */ - if(omsb > (unsigned) exponentChange) + if (omsb > (unsigned) exponentChange) omsb -= exponentChange; else omsb = 0; @@ -1234,28 +1232,28 @@ APFloat::normalize(roundingMode rounding_mode, /* As specified in IEEE 754, since we do not trap we do not report underflow for exact results. */ - if(lost_fraction == lfExactlyZero) { + if (lost_fraction == lfExactlyZero) { /* Canonicalize zeroes. */ - if(omsb == 0) + if (omsb == 0) category = fcZero; return opOK; } /* Increment the significand if we're rounding away from zero. */ - if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) { - if(omsb == 0) + if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { + if (omsb == 0) exponent = semantics->minExponent; incrementSignificand(); omsb = significandMSB() + 1; /* Did the significand increment overflow? */ - if(omsb == (unsigned) semantics->precision + 1) { + if (omsb == (unsigned) semantics->precision + 1) { /* Renormalize by incrementing the exponent and shifting our significand right one. However if we already have the maximum exponent we overflow to infinity. */ - if(exponent == semantics->maxExponent) { + if (exponent == semantics->maxExponent) { category = fcInfinity; return (opStatus) (opOverflow | opInexact); @@ -1269,14 +1267,14 @@ APFloat::normalize(roundingMode rounding_mode, /* The normal case - we were and are not denormal, and any significand increment above didn't overflow. */ - if(omsb == semantics->precision) + if (omsb == semantics->precision) return opInexact; /* We have a non-zero denormal. */ assert(omsb < semantics->precision); /* Canonicalize zeroes. */ - if(omsb == 0) + if (omsb == 0) category = fcZero; /* The fcZero case is a denormal that underflowed to zero. */ @@ -1324,7 +1322,7 @@ APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) case convolve(fcInfinity, fcInfinity): /* Differently signed infinities can only be validly subtracted. */ - if(((sign ^ rhs.sign)!=0) != subtract) { + if (((sign ^ rhs.sign)!=0) != subtract) { makeNaN(); return opInvalidOp; } @@ -1352,7 +1350,7 @@ APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) bits = exponent - rhs.exponent; /* Subtraction is more subtle than one might naively expect. */ - if(subtract) { + if (subtract) { APFloat temp_rhs(rhs); bool reverse; @@ -1381,16 +1379,16 @@ APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) /* Invert the lost fraction - it was on the RHS and subtracted. */ - if(lost_fraction == lfLessThanHalf) + if (lost_fraction == lfLessThanHalf) lost_fraction = lfMoreThanHalf; - else if(lost_fraction == lfMoreThanHalf) + else if (lost_fraction == lfMoreThanHalf) lost_fraction = lfLessThanHalf; /* The code above is intended to ensure that no borrow is necessary. */ assert(!carry); } else { - if(bits > 0) { + if (bits > 0) { APFloat temp_rhs(rhs); lost_fraction = temp_rhs.shiftSignificandRight(bits); @@ -1561,7 +1559,7 @@ APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode, fs = addOrSubtractSpecials(rhs, subtract); /* This return code means it was not a simple case. */ - if(fs == opDivByZero) { + if (fs == opDivByZero) { lostFraction lost_fraction; lost_fraction = addOrSubtractSignificand(rhs, subtract); @@ -1574,8 +1572,8 @@ APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode, /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a positive zero unless rounding to minus infinity, except that adding two like-signed zeroes gives that zero. */ - if(category == fcZero) { - if(rhs.category != fcZero || (sign == rhs.sign) == subtract) + if (category == fcZero) { + if (rhs.category != fcZero || (sign == rhs.sign) == subtract) sign = (rounding_mode == rmTowardNegative); } @@ -1606,10 +1604,10 @@ APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode) sign ^= rhs.sign; fs = multiplySpecials(rhs); - if(category == fcNormal) { + if (category == fcNormal) { lostFraction lost_fraction = multiplySignificand(rhs, 0); fs = normalize(rounding_mode, lost_fraction); - if(lost_fraction != lfExactlyZero) + if (lost_fraction != lfExactlyZero) fs = (opStatus) (fs | opInexact); } @@ -1626,10 +1624,10 @@ APFloat::divide(const APFloat &rhs, roundingMode rounding_mode) sign ^= rhs.sign; fs = divideSpecials(rhs); - if(category == fcNormal) { + if (category == fcNormal) { lostFraction lost_fraction = divideSignificand(rhs); fs = normalize(rounding_mode, lost_fraction); - if(lost_fraction != lfExactlyZero) + if (lost_fraction != lfExactlyZero) fs = (opStatus) (fs | opInexact); } @@ -1673,7 +1671,7 @@ APFloat::remainder(const APFloat &rhs) return fs; } -/* Normalized llvm frem (C fmod). +/* Normalized llvm frem (C fmod). This is not currently correct in all cases. */ APFloat::opStatus APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) @@ -1730,20 +1728,20 @@ APFloat::fusedMultiplyAdd(const APFloat &multiplicand, /* If and only if all arguments are normal do we need to do an extended-precision calculation. */ - if(category == fcNormal - && multiplicand.category == fcNormal - && addend.category == fcNormal) { + if (category == fcNormal && + multiplicand.category == fcNormal && + addend.category == fcNormal) { lostFraction lost_fraction; lost_fraction = multiplySignificand(multiplicand, &addend); fs = normalize(rounding_mode, lost_fraction); - if(lost_fraction != lfExactlyZero) + if (lost_fraction != lfExactlyZero) fs = (opStatus) (fs | opInexact); /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a positive zero unless rounding to minus infinity, except that adding two like-signed zeroes gives that zero. */ - if(category == fcZero && sign != addend.sign) + if (category == fcZero && sign != addend.sign) sign = (rounding_mode == rmTowardNegative); } else { fs = multiplySpecials(multiplicand); @@ -1755,7 +1753,7 @@ APFloat::fusedMultiplyAdd(const APFloat &multiplicand, If we need to do the addition we can do so with normal precision. */ - if(fs == opOK) + if (fs == opOK) fs = addOrSubtract(addend, rounding_mode, false); } @@ -1787,7 +1785,7 @@ APFloat::compare(const APFloat &rhs) const case convolve(fcInfinity, fcNormal): case convolve(fcInfinity, fcZero): case convolve(fcNormal, fcZero): - if(sign) + if (sign) return cmpLessThan; else return cmpGreaterThan; @@ -1795,15 +1793,15 @@ APFloat::compare(const APFloat &rhs) const case convolve(fcNormal, fcInfinity): case convolve(fcZero, fcInfinity): case convolve(fcZero, fcNormal): - if(rhs.sign) + if (rhs.sign) return cmpGreaterThan; else return cmpLessThan; case convolve(fcInfinity, fcInfinity): - if(sign == rhs.sign) + if (sign == rhs.sign) return cmpEqual; - else if(sign) + else if (sign) return cmpLessThan; else return cmpGreaterThan; @@ -1816,8 +1814,8 @@ APFloat::compare(const APFloat &rhs) const } /* Two normal numbers. Do they have the same sign? */ - if(sign != rhs.sign) { - if(sign) + if (sign != rhs.sign) { + if (sign) result = cmpLessThan; else result = cmpGreaterThan; @@ -1825,10 +1823,10 @@ APFloat::compare(const APFloat &rhs) const /* Compare absolute values; invert result if negative. */ result = compareAbsoluteValue(rhs); - if(sign) { - if(result == cmpLessThan) + if (sign) { + if (result == cmpLessThan) result = cmpGreaterThan; - else if(result == cmpGreaterThan) + else if (result == cmpGreaterThan) result = cmpLessThan; } } @@ -1886,7 +1884,7 @@ APFloat::convert(const fltSemantics &toSemantics, } } - if(category == fcNormal) { + if (category == fcNormal) { /* Re-interpret our bit-pattern. */ exponent += toSemantics.precision - semantics->precision; semantics = &toSemantics; @@ -1911,7 +1909,7 @@ APFloat::convert(const fltSemantics &toSemantics, // x87 long double). if (APInt::tcLSB(significandParts(), newPartCount) < ushift) *losesInfo = true; - if (oldSemantics == &APFloat::x87DoubleExtended && + if (oldSemantics == &APFloat::x87DoubleExtended && (!(*significandParts() & 0x8000000000000000ULL) || !(*significandParts() & 0x4000000000000000ULL))) *losesInfo = true; @@ -1956,12 +1954,12 @@ APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width, *isExact = false; /* Handle the three special cases first. */ - if(category == fcInfinity || category == fcNaN) + if (category == fcInfinity || category == fcNaN) return opInvalidOp; dstPartsCount = partCountForBits(width); - if(category == fcZero) { + if (category == fcZero) { APInt::tcSet(parts, 0, dstPartsCount); // Negative zero can't be represented as an int. *isExact = !sign; @@ -2004,8 +2002,8 @@ APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width, if (truncatedBits) { lost_fraction = lostFractionThroughTruncation(src, partCount(), truncatedBits); - if (lost_fraction != lfExactlyZero - && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { + if (lost_fraction != lfExactlyZero && + roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { if (APInt::tcIncrement(parts, dstPartsCount)) return opInvalidOp; /* Overflow. */ } @@ -2062,7 +2060,7 @@ APFloat::convertToInteger(integerPart *parts, unsigned int width, { opStatus fs; - fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, + fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, isExact); if (fs == opInvalidOp) { @@ -2149,8 +2147,8 @@ APFloat::convertFromSignExtendedInteger(const integerPart *src, opStatus status; assertArithmeticOK(*semantics); - if (isSigned - && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { + if (isSigned && + APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { integerPart *copy; /* If we're signed and negative negate a copy. */ @@ -2178,7 +2176,7 @@ APFloat::convertFromZeroExtendedInteger(const integerPart *parts, APInt api = APInt(width, partCount, parts); sign = false; - if(isSigned && APInt::tcExtractBit(parts, width - 1)) { + if (isSigned && APInt::tcExtractBit(parts, width - 1)) { sign = true; api = -api; } @@ -2209,10 +2207,10 @@ APFloat::convertFromHexadecimalString(const StringRef &s, StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot); firstSignificantDigit = p; - for(; p != end;) { + for (; p != end;) { integerPart hex_value; - if(*p == '.') { + if (*p == '.') { assert(dot == end && "String contains multiple dots"); dot = p++; if (p == end) { @@ -2221,7 +2219,7 @@ APFloat::convertFromHexadecimalString(const StringRef &s, } hex_value = hexDigitValue(*p); - if(hex_value == -1U) { + if (hex_value == -1U) { break; } @@ -2231,13 +2229,13 @@ APFloat::convertFromHexadecimalString(const StringRef &s, break; } else { /* Store the number whilst 4-bit nibbles remain. */ - if(bitPos) { + if (bitPos) { bitPos -= 4; hex_value <<= bitPos % integerPartWidth; significand[bitPos / integerPartWidth] |= hex_value; } else { lost_fraction = trailingHexadecimalFraction(p, end, hex_value); - while(p != end && hexDigitValue(*p) != -1U) + while (p != end && hexDigitValue(*p) != -1U) p++; break; } @@ -2251,7 +2249,7 @@ APFloat::convertFromHexadecimalString(const StringRef &s, assert((dot == end || p - begin != 1) && "Significand has no digits"); /* Ignore the exponent if we are zero. */ - if(p != firstSignificantDigit) { + if (p != firstSignificantDigit) { int expAdjustment; /* Implicit hexadecimal point? */ @@ -2261,7 +2259,7 @@ APFloat::convertFromHexadecimalString(const StringRef &s, /* Calculate the exponent adjustment implicit in the number of significant digits. */ expAdjustment = static_cast<int>(dot - firstSignificantDigit); - if(expAdjustment < 0) + if (expAdjustment < 0) expAdjustment++; expAdjustment = expAdjustment * 4 - 1; @@ -2287,8 +2285,8 @@ APFloat::roundSignificandWithExponent(const integerPart *decSigParts, integerPart pow5Parts[maxPowerOfFiveParts]; bool isNearest; - isNearest = (rounding_mode == rmNearestTiesToEven - || rounding_mode == rmNearestTiesToAway); + isNearest = (rounding_mode == rmNearestTiesToEven || + rounding_mode == rmNearestTiesToAway); parts = partCountForBits(semantics->precision + 11); @@ -2482,13 +2480,13 @@ APFloat::convertFromString(const StringRef &str, roundingMode rounding_mode) StringRef::iterator p = str.begin(); size_t slen = str.size(); sign = *p == '-' ? 1 : 0; - if(*p == '-' || *p == '+') { + if (*p == '-' || *p == '+') { p++; slen--; assert(slen && "String has no digits"); } - if(slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { + if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { assert(slen - 2 && "Invalid string"); return convertFromHexadecimalString(StringRef(p + 2, slen - 2), rounding_mode); @@ -3013,7 +3011,7 @@ APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) // exponent2 and significand2 are required to be 0; we don't check category = fcInfinity; } else if (myexponent==0x7ff && mysignificand!=0) { - // exponent meaningless. So is the whole second word, but keep it + // exponent meaningless. So is the whole second word, but keep it // for determinism. category = fcNaN; exponent2 = myexponent2; @@ -3031,7 +3029,7 @@ APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) exponent = -1022; else significandParts()[0] |= 0x10000000000000LL; // integer bit - if (myexponent2==0) + if (myexponent2==0) exponent2 = -1022; else significandParts()[1] |= 0x10000000000000LL; // integer bit @@ -3217,8 +3215,8 @@ APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) { significand[i] = ~((integerPart) 0); // ...and then clear the top bits for internal consistency. - significand[N-1] - &= (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1)) - 1; + significand[N-1] &= + (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1)) - 1; return Val; } @@ -3247,8 +3245,8 @@ APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) { Val.exponent = Sem.minExponent; Val.zeroSignificand(); - Val.significandParts()[partCountForBits(Sem.precision)-1] - |= (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1)); + Val.significandParts()[partCountForBits(Sem.precision)-1] |= + (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1)); return Val; } @@ -3433,7 +3431,7 @@ void APFloat::toString(SmallVectorImpl<char> &Str, // log2(N * 5^e) == log2(N) + e * log2(5) // <= semantics->precision + e * 137 / 59 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) - + unsigned precision = semantics->precision + 137 * texp / 59; // Multiply significand by 5^e. @@ -3442,7 +3440,7 @@ void APFloat::toString(SmallVectorImpl<char> &Str, APInt five_to_the_i(precision, 5); while (true) { if (texp & 1) significand *= five_to_the_i; - + texp >>= 1; if (!texp) break; five_to_the_i *= five_to_the_i; diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 6a6384a..50025d2 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -702,15 +702,14 @@ static inline uint32_t hashword(const uint64_t *k64, size_t length) a = b = c = 0xdeadbeef + (((uint32_t)length)<<2); /*------------------------------------------------- handle most of the key */ - while (length > 3) - { - a += k[0]; - b += k[1]; - c += k[2]; - mix(a,b,c); - length -= 3; - k += 3; - } + while (length > 3) { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 3; + k += 3; + } /*------------------------------------------- handle the last 3 uint32_t's */ switch (length) { /* all the case statements fall through */ @@ -1383,8 +1382,8 @@ APInt APInt::sqrt() const { // libc sqrt function which will probably use a hardware sqrt computation. // This should be faster than the algorithm below. if (magnitude < 52) { -#ifdef _MSC_VER - // Amazingly, VC++ doesn't have round(). +#if defined( _MSC_VER ) || defined(_MINIX) + // Amazingly, VC++ and Minix don't have round(). return APInt(BitWidth, uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5); #else @@ -2065,8 +2064,8 @@ void APInt::fromString(unsigned numbits, const StringRef& str, uint8_t radix) { assert((slen <= numbits || radix != 2) && "Insufficient bit width"); assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width"); assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width"); - assert((((slen-1)*64)/22 <= numbits || radix != 10) - && "Insufficient bit width"); + assert((((slen-1)*64)/22 <= numbits || radix != 10) && + "Insufficient bit width"); // Allocate memory if (!isSingleWord()) @@ -2229,7 +2228,7 @@ namespace { static inline integerPart lowBitMask(unsigned int bits) { - assert (bits != 0 && bits <= integerPartWidth); + assert(bits != 0 && bits <= integerPartWidth); return ~(integerPart) 0 >> (integerPartWidth - bits); } @@ -2306,10 +2305,10 @@ APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts) { unsigned int i; - assert (parts > 0); + assert(parts > 0); dst[0] = part; - for(i = 1; i < parts; i++) + for (i = 1; i < parts; i++) dst[i] = 0; } @@ -2319,7 +2318,7 @@ APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts) { unsigned int i; - for(i = 0; i < parts; i++) + for (i = 0; i < parts; i++) dst[i] = src[i]; } @@ -2329,7 +2328,7 @@ APInt::tcIsZero(const integerPart *src, unsigned int parts) { unsigned int i; - for(i = 0; i < parts; i++) + for (i = 0; i < parts; i++) if (src[i]) return false; @@ -2340,8 +2339,8 @@ APInt::tcIsZero(const integerPart *src, unsigned int parts) int APInt::tcExtractBit(const integerPart *parts, unsigned int bit) { - return(parts[bit / integerPartWidth] - & ((integerPart) 1 << bit % integerPartWidth)) != 0; + return (parts[bit / integerPartWidth] & + ((integerPart) 1 << bit % integerPartWidth)) != 0; } /* Set the given bit of a bignum. */ @@ -2366,7 +2365,7 @@ APInt::tcLSB(const integerPart *parts, unsigned int n) { unsigned int i, lsb; - for(i = 0; i < n; i++) { + for (i = 0; i < n; i++) { if (parts[i] != 0) { lsb = partLSB(parts[i]); @@ -2385,13 +2384,13 @@ APInt::tcMSB(const integerPart *parts, unsigned int n) unsigned int msb; do { - --n; + --n; - if (parts[n] != 0) { - msb = partMSB(parts[n]); + if (parts[n] != 0) { + msb = partMSB(parts[n]); - return msb + n * integerPartWidth; - } + return msb + n * integerPartWidth; + } } while (n); return -1U; @@ -2408,7 +2407,7 @@ APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src, unsigned int firstSrcPart, dstParts, shift, n; dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth; - assert (dstParts <= dstCount); + assert(dstParts <= dstCount); firstSrcPart = srcLSB / integerPartWidth; tcAssign (dst, src + firstSrcPart, dstParts); @@ -2443,7 +2442,7 @@ APInt::tcAdd(integerPart *dst, const integerPart *rhs, assert(c <= 1); - for(i = 0; i < parts; i++) { + for (i = 0; i < parts; i++) { integerPart l; l = dst[i]; @@ -2468,7 +2467,7 @@ APInt::tcSubtract(integerPart *dst, const integerPart *rhs, assert(c <= 1); - for(i = 0; i < parts; i++) { + for (i = 0; i < parts; i++) { integerPart l; l = dst[i]; @@ -2518,7 +2517,7 @@ APInt::tcMultiplyPart(integerPart *dst, const integerPart *src, /* N loops; minimum of dstParts and srcParts. */ n = dstParts < srcParts ? dstParts: srcParts; - for(i = 0; i < n; i++) { + for (i = 0; i < n; i++) { integerPart low, mid, high, srcPart; /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY. @@ -2583,7 +2582,7 @@ APInt::tcMultiplyPart(integerPart *dst, const integerPart *src, non-zero. This is true if any remaining src parts are non-zero and the multiplier is non-zero. */ if (multiplier) - for(; i < srcParts; i++) + for (; i < srcParts; i++) if (src[i]) return 1; @@ -2608,7 +2607,7 @@ APInt::tcMultiply(integerPart *dst, const integerPart *lhs, overflow = 0; tcSet(dst, 0, parts); - for(i = 0; i < parts; i++) + for (i = 0; i < parts; i++) overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts, parts - i, true); @@ -2634,7 +2633,7 @@ APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs, tcSet(dst, 0, rhsParts); - for(n = 0; n < lhsParts; n++) + for (n = 0; n < lhsParts; n++) tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true); n = lhsParts + rhsParts; @@ -2678,7 +2677,7 @@ APInt::tcDivide(integerPart *lhs, const integerPart *rhs, /* Loop, subtracting SRHS if REMAINDER is greater and adding that to the total. */ - for(;;) { + for (;;) { int compare; compare = tcCompare(remainder, srhs, parts); @@ -2746,7 +2745,7 @@ APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count) /* Perform the shift. This leaves the most significant COUNT bits of the result at zero. */ - for(i = 0; i < parts; i++) { + for (i = 0; i < parts; i++) { integerPart part; if (i + jump >= parts) { @@ -2771,7 +2770,7 @@ APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts) { unsigned int i; - for(i = 0; i < parts; i++) + for (i = 0; i < parts; i++) dst[i] &= rhs[i]; } @@ -2781,7 +2780,7 @@ APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts) { unsigned int i; - for(i = 0; i < parts; i++) + for (i = 0; i < parts; i++) dst[i] |= rhs[i]; } @@ -2791,7 +2790,7 @@ APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts) { unsigned int i; - for(i = 0; i < parts; i++) + for (i = 0; i < parts; i++) dst[i] ^= rhs[i]; } @@ -2801,7 +2800,7 @@ APInt::tcComplement(integerPart *dst, unsigned int parts) { unsigned int i; - for(i = 0; i < parts; i++) + for (i = 0; i < parts; i++) dst[i] = ~dst[i]; } @@ -2830,7 +2829,7 @@ APInt::tcIncrement(integerPart *dst, unsigned int parts) { unsigned int i; - for(i = 0; i < parts; i++) + for (i = 0; i < parts; i++) if (++dst[i] != 0) break; diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 2ab4103..d31f34e 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -676,8 +676,8 @@ void cl::ParseCommandLineOptions(int argc, char **argv, << " positional arguments: See: " << argv[0] << " -help\n"; ErrorParsing = true; - } else if (!HasUnlimitedPositionals - && PositionalVals.size() > PositionalOpts.size()) { + } else if (!HasUnlimitedPositionals && + PositionalVals.size() > PositionalOpts.size()) { errs() << ProgramName << ": Too many positional arguments specified!\n" << "Can specify at most " << PositionalOpts.size() diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp index 82b4b8c..eccfa0b 100644 --- a/lib/Support/Debug.cpp +++ b/lib/Support/Debug.cpp @@ -64,8 +64,7 @@ DebugOnly("debug-only", cl::desc("Enable a specific type of debug output"), cl::location(DebugOnlyOptLoc), cl::ValueRequired); // Signal handlers - dump debug output on termination. -static void debug_user_sig_handler(void *Cookie) -{ +static void debug_user_sig_handler(void *Cookie) { // This is a bit sneaky. Since this is under #ifndef NDEBUG, we // know that debug mode is enabled and dbgs() really is a // circular_raw_ostream. If NDEBUG is defined, then dbgs() == diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp index 8bb1566..4412cb2 100644 --- a/lib/Support/ErrorHandling.cpp +++ b/lib/Support/ErrorHandling.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file defines an API for error handling, it supersedes cerr+abort(), and +// This file defines an API for error handling, it supersedes cerr+abort(), and // cerr+exit() style error handling. // Callbacks can be registered for these errors through this API. //===----------------------------------------------------------------------===// @@ -57,7 +57,7 @@ void llvm_report_error(const Twine &reason) { exit(1); } -void llvm_unreachable_internal(const char *msg, const char *file, +void llvm_unreachable_internal(const char *msg, const char *file, unsigned line) { // This code intentionally doesn't call the ErrorHandler callback, because // llvm_unreachable is intended to be used to indicate "impossible" @@ -71,4 +71,3 @@ void llvm_unreachable_internal(const char *msg, const char *file, abort(); } } - diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index 345a78c..4f135ea 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -14,6 +14,7 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" +#include "llvm/System/Errno.h" #include "llvm/System/Path.h" #include "llvm/System/Process.h" #include "llvm/System/Program.h" @@ -167,6 +168,14 @@ public: sys::Path::UnMapFilePages(getBufferStart(), getBufferSize()); } }; + +/// FileCloser - RAII object to make sure an FD gets closed properly. +class FileCloser { + int FD; +public: + FileCloser(int FD) : FD(FD) {} + ~FileCloser() { ::close(FD); } +}; } MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr, @@ -178,9 +187,10 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr, SmallString<256> PathBuf(Filename.begin(), Filename.end()); int FD = ::open(PathBuf.c_str(), O_RDONLY|OpenFlags); if (FD == -1) { - if (ErrStr) *ErrStr = strerror(errno); + if (ErrStr) *ErrStr = sys::StrError(); return 0; } + FileCloser FC(FD); // Close FD on return. // If we don't know the file size, use fstat to find out. fstat on an open // file descriptor is cheaper than stat on a random path. @@ -190,8 +200,7 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr, // TODO: This should use fstat64 when available. if (fstat(FD, FileInfoPtr) == -1) { - if (ErrStr) *ErrStr = strerror(errno); - ::close(FD); + if (ErrStr) *ErrStr = sys::StrError(); return 0; } FileSize = FileInfoPtr->st_size; @@ -208,7 +217,6 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr, (FileSize & (sys::Process::GetPageSize()-1)) != 0) { if (const char *Pages = sys::Path::MapInFilePages(FD, FileSize)) { // Close the file descriptor, now that the whole file is in memory. - ::close(FD); return new MemoryBufferMMapFile(Filename, Pages, FileSize); } } @@ -217,30 +225,31 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr, if (!Buf) { // Failed to create a buffer. if (ErrStr) *ErrStr = "could not allocate buffer"; - ::close(FD); return 0; } OwningPtr<MemoryBuffer> SB(Buf); char *BufPtr = const_cast<char*>(SB->getBufferStart()); - + size_t BytesLeft = FileSize; while (BytesLeft) { ssize_t NumRead = ::read(FD, BufPtr, BytesLeft); - if (NumRead > 0) { - BytesLeft -= NumRead; - BufPtr += NumRead; - } else if (NumRead == -1 && errno == EINTR) { - // try again - } else { - // error reading. - if (ErrStr) *ErrStr = strerror(errno); - close(FD); + if (NumRead == -1) { + if (errno == EINTR) + continue; + // Error while reading. + if (ErrStr) *ErrStr = sys::StrError(); return 0; + } else if (NumRead == 0) { + // We hit EOF early, truncate and terminate buffer. + Buf->BufferEnd = BufPtr; + *BufPtr = 0; + return SB.take(); } + BytesLeft -= NumRead; + BufPtr += NumRead; } - close(FD); - + return SB.take(); } diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp index e787670..7d5f65a 100644 --- a/lib/Support/Statistic.cpp +++ b/lib/Support/Statistic.cpp @@ -32,8 +32,8 @@ #include <cstring> using namespace llvm; -// GetLibSupportInfoOutputFile - Return a file stream to print our output on. -namespace llvm { extern raw_ostream *GetLibSupportInfoOutputFile(); } +// CreateInfoOutputFile - Return a file stream to print our output on. +namespace llvm { extern raw_ostream *CreateInfoOutputFile(); } /// -stats - Command line option to cause transformations to emit stats about /// what they did. @@ -48,6 +48,8 @@ namespace { /// llvm_shutdown is called. We print statistics from the destructor. class StatisticInfo { std::vector<const Statistic*> Stats; + friend void llvm::PrintStatistics(); + friend void llvm::PrintStatistics(raw_ostream &OS); public: ~StatisticInfo(); @@ -92,42 +94,55 @@ struct NameCompare { // Print information when destroyed, iff command line option is specified. StatisticInfo::~StatisticInfo() { - // Statistics not enabled? - if (Stats.empty()) return; + llvm::PrintStatistics(); +} - // Get the stream to write to. - raw_ostream &OutStream = *GetLibSupportInfoOutputFile(); +void llvm::EnableStatistics() { + Enabled.setValue(true); +} + +void llvm::PrintStatistics(raw_ostream &OS) { + StatisticInfo &Stats = *StatInfo; // Figure out how long the biggest Value and Name fields are. unsigned MaxNameLen = 0, MaxValLen = 0; - for (size_t i = 0, e = Stats.size(); i != e; ++i) { + for (size_t i = 0, e = Stats.Stats.size(); i != e; ++i) { MaxValLen = std::max(MaxValLen, - (unsigned)utostr(Stats[i]->getValue()).size()); + (unsigned)utostr(Stats.Stats[i]->getValue()).size()); MaxNameLen = std::max(MaxNameLen, - (unsigned)std::strlen(Stats[i]->getName())); + (unsigned)std::strlen(Stats.Stats[i]->getName())); } // Sort the fields by name. - std::stable_sort(Stats.begin(), Stats.end(), NameCompare()); + std::stable_sort(Stats.Stats.begin(), Stats.Stats.end(), NameCompare()); // Print out the statistics header... - OutStream << "===" << std::string(73, '-') << "===\n" - << " ... Statistics Collected ...\n" - << "===" << std::string(73, '-') << "===\n\n"; + OS << "===" << std::string(73, '-') << "===\n" + << " ... Statistics Collected ...\n" + << "===" << std::string(73, '-') << "===\n\n"; // Print all of the statistics. - for (size_t i = 0, e = Stats.size(); i != e; ++i) { - std::string CountStr = utostr(Stats[i]->getValue()); - OutStream << std::string(MaxValLen-CountStr.size(), ' ') - << CountStr << " " << Stats[i]->getName() - << std::string(MaxNameLen-std::strlen(Stats[i]->getName()), ' ') - << " - " << Stats[i]->getDesc() << "\n"; - + for (size_t i = 0, e = Stats.Stats.size(); i != e; ++i) { + std::string CountStr = utostr(Stats.Stats[i]->getValue()); + OS << std::string(MaxValLen-CountStr.size(), ' ') + << CountStr << " " << Stats.Stats[i]->getName() + << std::string(MaxNameLen-std::strlen(Stats.Stats[i]->getName()), ' ') + << " - " << Stats.Stats[i]->getDesc() << "\n"; } - OutStream << '\n'; // Flush the output stream... - OutStream.flush(); - - if (&OutStream != &outs() && &OutStream != &errs() && &OutStream != &dbgs()) - delete &OutStream; // Close the file. + OS << '\n'; // Flush the output stream. + OS.flush(); + +} + +void llvm::PrintStatistics() { + StatisticInfo &Stats = *StatInfo; + + // Statistics not enabled? + if (Stats.Stats.empty()) return; + + // Get the stream to write to. + raw_ostream &OutStream = *CreateInfoOutputFile(); + PrintStatistics(OutStream); + delete &OutStream; // Close the file. } diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp index 4bdfac2..4fac073 100644 --- a/lib/Support/Timer.cpp +++ b/lib/Support/Timer.cpp @@ -11,20 +11,20 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Format.h" +#include "llvm/System/Mutex.h" #include "llvm/System/Process.h" -#include <algorithm> -#include <functional> -#include <map> +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/StringMap.h" using namespace llvm; -// GetLibSupportInfoOutputFile - Return a file stream to print our output on. -namespace llvm { extern raw_ostream *GetLibSupportInfoOutputFile(); } +// CreateInfoOutputFile - Return a file stream to print our output on. +namespace llvm { extern raw_ostream *CreateInfoOutputFile(); } // getLibSupportInfoOutputFilename - This ugly hack is brought to you courtesy // of constructor/destructor ordering being unspecified by C++. Basically the @@ -53,117 +53,103 @@ namespace { cl::Hidden, cl::location(getLibSupportInfoOutputFilename())); } +// CreateInfoOutputFile - Return a file stream to print our output on. +raw_ostream *llvm::CreateInfoOutputFile() { + const std::string &OutputFilename = getLibSupportInfoOutputFilename(); + if (OutputFilename.empty()) + return new raw_fd_ostream(2, false); // stderr. + if (OutputFilename == "-") + return new raw_fd_ostream(1, false); // stdout. + + std::string Error; + raw_ostream *Result = new raw_fd_ostream(OutputFilename.c_str(), + Error, raw_fd_ostream::F_Append); + if (Error.empty()) + return Result; + + errs() << "Error opening info-output-file '" + << OutputFilename << " for appending!\n"; + delete Result; + return new raw_fd_ostream(2, false); // stderr. +} + + static TimerGroup *DefaultTimerGroup = 0; static TimerGroup *getDefaultTimerGroup() { - TimerGroup* tmp = DefaultTimerGroup; + TimerGroup *tmp = DefaultTimerGroup; sys::MemoryFence(); + if (tmp) return tmp; + + llvm_acquire_global_lock(); + tmp = DefaultTimerGroup; if (!tmp) { - llvm_acquire_global_lock(); - tmp = DefaultTimerGroup; - if (!tmp) { - tmp = new TimerGroup("Miscellaneous Ungrouped Timers"); - sys::MemoryFence(); - DefaultTimerGroup = tmp; - } - llvm_release_global_lock(); + tmp = new TimerGroup("Miscellaneous Ungrouped Timers"); + sys::MemoryFence(); + DefaultTimerGroup = tmp; } + llvm_release_global_lock(); return tmp; } -Timer::Timer(const std::string &N) - : Elapsed(0), UserTime(0), SystemTime(0), MemUsed(0), PeakMem(0), Name(N), - Started(false), TG(getDefaultTimerGroup()) { - TG->addTimer(); -} - -Timer::Timer(const std::string &N, TimerGroup &tg) - : Elapsed(0), UserTime(0), SystemTime(0), MemUsed(0), PeakMem(0), Name(N), - Started(false), TG(&tg) { - TG->addTimer(); -} +//===----------------------------------------------------------------------===// +// Timer Implementation +//===----------------------------------------------------------------------===// -Timer::Timer(const Timer &T) { - TG = T.TG; - if (TG) TG->addTimer(); - operator=(T); +void Timer::init(StringRef N) { + assert(TG == 0 && "Timer already initialized"); + Name.assign(N.begin(), N.end()); + Started = false; + TG = getDefaultTimerGroup(); + TG->addTimer(*this); } - -// Copy ctor, initialize with no TG member. -Timer::Timer(bool, const Timer &T) { - TG = T.TG; // Avoid assertion in operator= - operator=(T); // Copy contents - TG = 0; +void Timer::init(StringRef N, TimerGroup &tg) { + assert(TG == 0 && "Timer already initialized"); + Name.assign(N.begin(), N.end()); + Started = false; + TG = &tg; + TG->addTimer(*this); } - Timer::~Timer() { - if (TG) { - if (Started) { - Started = false; - TG->addTimerToPrint(*this); - } - TG->removeTimer(); - } + if (!TG) return; // Never initialized, or already cleared. + TG->removeTimer(*this); } static inline size_t getMemUsage() { - if (TrackSpace) - return sys::Process::GetMallocUsage(); - return 0; + if (!TrackSpace) return 0; + return sys::Process::GetMallocUsage(); } -struct TimeRecord { - double Elapsed, UserTime, SystemTime; - ssize_t MemUsed; -}; - -static TimeRecord getTimeRecord(bool Start) { +TimeRecord TimeRecord::getCurrentTime(bool Start) { TimeRecord Result; - - sys::TimeValue now(0,0); - sys::TimeValue user(0,0); - sys::TimeValue sys(0,0); - - ssize_t MemUsed = 0; + sys::TimeValue now(0,0), user(0,0), sys(0,0); + if (Start) { - MemUsed = getMemUsage(); - sys::Process::GetTimeUsage(now,user,sys); + Result.MemUsed = getMemUsage(); + sys::Process::GetTimeUsage(now, user, sys); } else { - sys::Process::GetTimeUsage(now,user,sys); - MemUsed = getMemUsage(); + sys::Process::GetTimeUsage(now, user, sys); + Result.MemUsed = getMemUsage(); } - Result.Elapsed = now.seconds() + now.microseconds() / 1000000.0; - Result.UserTime = user.seconds() + user.microseconds() / 1000000.0; - Result.SystemTime = sys.seconds() + sys.microseconds() / 1000000.0; - Result.MemUsed = MemUsed; - + Result.WallTime = now.seconds() + now.microseconds() / 1000000.0; + Result.UserTime = user.seconds() + user.microseconds() / 1000000.0; + Result.SystemTime = sys.seconds() + sys.microseconds() / 1000000.0; return Result; } static ManagedStatic<std::vector<Timer*> > ActiveTimers; void Timer::startTimer() { - sys::SmartScopedLock<true> L(*TimerLock); Started = true; ActiveTimers->push_back(this); - TimeRecord TR = getTimeRecord(true); - Elapsed -= TR.Elapsed; - UserTime -= TR.UserTime; - SystemTime -= TR.SystemTime; - MemUsed -= TR.MemUsed; - PeakMemBase = TR.MemUsed; + Time -= TimeRecord::getCurrentTime(true); } void Timer::stopTimer() { - sys::SmartScopedLock<true> L(*TimerLock); - TimeRecord TR = getTimeRecord(false); - Elapsed += TR.Elapsed; - UserTime += TR.UserTime; - SystemTime += TR.SystemTime; - MemUsed += TR.MemUsed; + Time += TimeRecord::getCurrentTime(false); if (ActiveTimers->back() == this) { ActiveTimers->pop_back(); @@ -175,217 +161,223 @@ void Timer::stopTimer() { } } -void Timer::sum(const Timer &T) { - Elapsed += T.Elapsed; - UserTime += T.UserTime; - SystemTime += T.SystemTime; - MemUsed += T.MemUsed; - PeakMem += T.PeakMem; +static void printVal(double Val, double Total, raw_ostream &OS) { + if (Total < 1e-7) // Avoid dividing by zero. + OS << " ----- "; + else { + OS << " " << format("%7.4f", Val) << " ("; + OS << format("%5.1f", Val*100/Total) << "%)"; + } } -/// addPeakMemoryMeasurement - This method should be called whenever memory -/// usage needs to be checked. It adds a peak memory measurement to the -/// currently active timers, which will be printed when the timer group prints -/// -void Timer::addPeakMemoryMeasurement() { - sys::SmartScopedLock<true> L(*TimerLock); - size_t MemUsed = getMemUsage(); - - for (std::vector<Timer*>::iterator I = ActiveTimers->begin(), - E = ActiveTimers->end(); I != E; ++I) - (*I)->PeakMem = std::max((*I)->PeakMem, MemUsed-(*I)->PeakMemBase); +void TimeRecord::print(const TimeRecord &Total, raw_ostream &OS) const { + if (Total.getUserTime()) + printVal(getUserTime(), Total.getUserTime(), OS); + if (Total.getSystemTime()) + printVal(getSystemTime(), Total.getSystemTime(), OS); + if (Total.getProcessTime()) + printVal(getProcessTime(), Total.getProcessTime(), OS); + printVal(getWallTime(), Total.getWallTime(), OS); + + OS << " "; + + if (Total.getMemUsed()) + OS << format("%9lld", (long long)getMemUsed()) << " "; } + //===----------------------------------------------------------------------===// // NamedRegionTimer Implementation //===----------------------------------------------------------------------===// -namespace { - -typedef std::map<std::string, Timer> Name2Timer; -typedef std::map<std::string, std::pair<TimerGroup, Name2Timer> > Name2Pair; - -} - -static ManagedStatic<Name2Timer> NamedTimers; - -static ManagedStatic<Name2Pair> NamedGroupedTimers; +typedef StringMap<Timer> Name2TimerMap; -static Timer &getNamedRegionTimer(const std::string &Name) { - sys::SmartScopedLock<true> L(*TimerLock); - Name2Timer::iterator I = NamedTimers->find(Name); - if (I != NamedTimers->end()) - return I->second; - - return NamedTimers->insert(I, std::make_pair(Name, Timer(Name)))->second; -} - -static Timer &getNamedRegionTimer(const std::string &Name, - const std::string &GroupName) { - sys::SmartScopedLock<true> L(*TimerLock); - - Name2Pair::iterator I = NamedGroupedTimers->find(GroupName); - if (I == NamedGroupedTimers->end()) { - TimerGroup TG(GroupName); - std::pair<TimerGroup, Name2Timer> Pair(TG, Name2Timer()); - I = NamedGroupedTimers->insert(I, std::make_pair(GroupName, Pair)); +class Name2PairMap { + StringMap<std::pair<TimerGroup*, Name2TimerMap> > Map; +public: + ~Name2PairMap() { + for (StringMap<std::pair<TimerGroup*, Name2TimerMap> >::iterator + I = Map.begin(), E = Map.end(); I != E; ++I) + delete I->second.first; } + + Timer &get(StringRef Name, StringRef GroupName) { + sys::SmartScopedLock<true> L(*TimerLock); + + std::pair<TimerGroup*, Name2TimerMap> &GroupEntry = Map[GroupName]; + + if (!GroupEntry.first) + GroupEntry.first = new TimerGroup(GroupName); + + Timer &T = GroupEntry.second[Name]; + if (!T.isInitialized()) + T.init(Name, *GroupEntry.first); + return T; + } +}; - Name2Timer::iterator J = I->second.second.find(Name); - if (J == I->second.second.end()) - J = I->second.second.insert(J, - std::make_pair(Name, - Timer(Name, - I->second.first))); +static ManagedStatic<Name2TimerMap> NamedTimers; +static ManagedStatic<Name2PairMap> NamedGroupedTimers; - return J->second; +static Timer &getNamedRegionTimer(StringRef Name) { + sys::SmartScopedLock<true> L(*TimerLock); + + Timer &T = (*NamedTimers)[Name]; + if (!T.isInitialized()) + T.init(Name); + return T; } -NamedRegionTimer::NamedRegionTimer(const std::string &Name) +NamedRegionTimer::NamedRegionTimer(StringRef Name) : TimeRegion(getNamedRegionTimer(Name)) {} -NamedRegionTimer::NamedRegionTimer(const std::string &Name, - const std::string &GroupName) - : TimeRegion(getNamedRegionTimer(Name, GroupName)) {} +NamedRegionTimer::NamedRegionTimer(StringRef Name, StringRef GroupName) + : TimeRegion(NamedGroupedTimers->get(Name, GroupName)) {} //===----------------------------------------------------------------------===// // TimerGroup Implementation //===----------------------------------------------------------------------===// +/// TimerGroupList - This is the global list of TimerGroups, maintained by the +/// TimerGroup ctor/dtor and is protected by the TimerLock lock. +static TimerGroup *TimerGroupList = 0; -static void printVal(double Val, double Total, raw_ostream &OS) { - if (Total < 1e-7) // Avoid dividing by zero... - OS << " ----- "; - else { - OS << " " << format("%7.4f", Val) << " ("; - OS << format("%5.1f", Val*100/Total) << "%)"; - } +TimerGroup::TimerGroup(StringRef name) + : Name(name.begin(), name.end()), FirstTimer(0) { + + // Add the group to TimerGroupList. + sys::SmartScopedLock<true> L(*TimerLock); + if (TimerGroupList) + TimerGroupList->Prev = &Next; + Next = TimerGroupList; + Prev = &TimerGroupList; + TimerGroupList = this; } -void Timer::print(const Timer &Total, raw_ostream &OS) { +TimerGroup::~TimerGroup() { + // If the timer group is destroyed before the timers it owns, accumulate and + // print the timing data. + while (FirstTimer != 0) + removeTimer(*FirstTimer); + + // Remove the group from the TimerGroupList. sys::SmartScopedLock<true> L(*TimerLock); - if (Total.UserTime) - printVal(UserTime, Total.UserTime, OS); - if (Total.SystemTime) - printVal(SystemTime, Total.SystemTime, OS); - if (Total.getProcessTime()) - printVal(getProcessTime(), Total.getProcessTime(), OS); - printVal(Elapsed, Total.Elapsed, OS); - - OS << " "; - - if (Total.MemUsed) { - OS << format("%9lld", (long long)MemUsed) << " "; - } - if (Total.PeakMem) { - if (PeakMem) { - OS << format("%9lld", (long long)PeakMem) << " "; - } else - OS << " "; - } - OS << Name << "\n"; - - Started = false; // Once printed, don't print again + *Prev = Next; + if (Next) + Next->Prev = Prev; } -// GetLibSupportInfoOutputFile - Return a file stream to print our output on... -raw_ostream * -llvm::GetLibSupportInfoOutputFile() { - std::string &LibSupportInfoOutputFilename = getLibSupportInfoOutputFilename(); - if (LibSupportInfoOutputFilename.empty()) - return &errs(); - if (LibSupportInfoOutputFilename == "-") - return &outs(); - - std::string Error; - raw_ostream *Result = new raw_fd_ostream(LibSupportInfoOutputFilename.c_str(), - Error, raw_fd_ostream::F_Append); - if (Error.empty()) - return Result; - - errs() << "Error opening info-output-file '" - << LibSupportInfoOutputFilename << " for appending!\n"; - delete Result; - return &errs(); +void TimerGroup::removeTimer(Timer &T) { + sys::SmartScopedLock<true> L(*TimerLock); + + // If the timer was started, move its data to TimersToPrint. + if (T.Started) + TimersToPrint.push_back(std::make_pair(T.Time, T.Name)); + + T.TG = 0; + + // Unlink the timer from our list. + *T.Prev = T.Next; + if (T.Next) + T.Next->Prev = T.Prev; + + // Print the report when all timers in this group are destroyed if some of + // them were started. + if (FirstTimer != 0 || TimersToPrint.empty()) + return; + + raw_ostream *OutStream = CreateInfoOutputFile(); + PrintQueuedTimers(*OutStream); + delete OutStream; // Close the file. } - -void TimerGroup::removeTimer() { +void TimerGroup::addTimer(Timer &T) { sys::SmartScopedLock<true> L(*TimerLock); - if (--NumTimers == 0 && !TimersToPrint.empty()) { // Print timing report... - // Sort the timers in descending order by amount of time taken... - std::sort(TimersToPrint.begin(), TimersToPrint.end(), - std::greater<Timer>()); - - // Figure out how many spaces to indent TimerGroup name... - unsigned Padding = (80-Name.length())/2; - if (Padding > 80) Padding = 0; // Don't allow "negative" numbers - - raw_ostream *OutStream = GetLibSupportInfoOutputFile(); - - ++NumTimers; - { // Scope to contain Total timer... don't allow total timer to drop us to - // zero timers... - Timer Total("TOTAL"); - - for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i) - Total.sum(TimersToPrint[i]); - - // Print out timing header... - *OutStream << "===" << std::string(73, '-') << "===\n" - << std::string(Padding, ' ') << Name << "\n" - << "===" << std::string(73, '-') - << "===\n"; - - // If this is not an collection of ungrouped times, print the total time. - // Ungrouped timers don't really make sense to add up. We still print the - // TOTAL line to make the percentages make sense. - if (this != DefaultTimerGroup) { - *OutStream << " Total Execution Time: "; - - *OutStream << format("%5.4f", Total.getProcessTime()) << " seconds ("; - *OutStream << format("%5.4f", Total.getWallTime()) << " wall clock)\n"; - } - *OutStream << "\n"; - - if (Total.UserTime) - *OutStream << " ---User Time---"; - if (Total.SystemTime) - *OutStream << " --System Time--"; - if (Total.getProcessTime()) - *OutStream << " --User+System--"; - *OutStream << " ---Wall Time---"; - if (Total.getMemUsed()) - *OutStream << " ---Mem---"; - if (Total.getPeakMem()) - *OutStream << " -PeakMem-"; - *OutStream << " --- Name ---\n"; - - // Loop through all of the timing data, printing it out... - for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i) - TimersToPrint[i].print(Total, *OutStream); - - Total.print(Total, *OutStream); - *OutStream << '\n'; - OutStream->flush(); - } - --NumTimers; - - TimersToPrint.clear(); - - if (OutStream != &errs() && OutStream != &outs() && OutStream != &dbgs()) - delete OutStream; // Close the file... + + // Add the timer to our list. + if (FirstTimer) + FirstTimer->Prev = &T.Next; + T.Next = FirstTimer; + T.Prev = &FirstTimer; + FirstTimer = &T; +} + +void TimerGroup::PrintQueuedTimers(raw_ostream &OS) { + // Sort the timers in descending order by amount of time taken. + std::sort(TimersToPrint.begin(), TimersToPrint.end()); + + TimeRecord Total; + for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i) + Total += TimersToPrint[i].first; + + // Print out timing header. + OS << "===" << std::string(73, '-') << "===\n"; + // Figure out how many spaces to indent TimerGroup name. + unsigned Padding = (80-Name.length())/2; + if (Padding > 80) Padding = 0; // Don't allow "negative" numbers + OS.indent(Padding) << Name << '\n'; + OS << "===" << std::string(73, '-') << "===\n"; + + // If this is not an collection of ungrouped times, print the total time. + // Ungrouped timers don't really make sense to add up. We still print the + // TOTAL line to make the percentages make sense. + if (this != DefaultTimerGroup) { + OS << " Total Execution Time: "; + OS << format("%5.4f", Total.getProcessTime()) << " seconds ("; + OS << format("%5.4f", Total.getWallTime()) << " wall clock)\n"; + } + OS << '\n'; + + if (Total.getUserTime()) + OS << " ---User Time---"; + if (Total.getSystemTime()) + OS << " --System Time--"; + if (Total.getProcessTime()) + OS << " --User+System--"; + OS << " ---Wall Time---"; + if (Total.getMemUsed()) + OS << " ---Mem---"; + OS << " --- Name ---\n"; + + // Loop through all of the timing data, printing it out. + for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i) { + const std::pair<TimeRecord, std::string> &Entry = TimersToPrint[e-i-1]; + Entry.first.print(Total, OS); + OS << Entry.second << '\n'; } + + Total.print(Total, OS); + OS << "Total\n\n"; + OS.flush(); + + TimersToPrint.clear(); } -void TimerGroup::addTimer() { +/// print - Print any started timers in this group and zero them. +void TimerGroup::print(raw_ostream &OS) { sys::SmartScopedLock<true> L(*TimerLock); - ++NumTimers; + + // See if any of our timers were started, if so add them to TimersToPrint and + // reset them. + for (Timer *T = FirstTimer; T; T = T->Next) { + if (!T->Started) continue; + TimersToPrint.push_back(std::make_pair(T->Time, T->Name)); + + // Clear out the time. + T->Started = 0; + T->Time = TimeRecord(); + } + + // If any timers were started, print the group. + if (!TimersToPrint.empty()) + PrintQueuedTimers(OS); } -void TimerGroup::addTimerToPrint(const Timer &T) { +/// printAll - This static method prints all timers and clears them all out. +void TimerGroup::printAll(raw_ostream &OS) { sys::SmartScopedLock<true> L(*TimerLock); - TimersToPrint.push_back(Timer(true, T)); -} + for (TimerGroup *TG = TimerGroupList; TG; TG = TG->Next) + TG->print(OS); +} diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 61bf0a7..9796ca5 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -189,7 +189,7 @@ Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) { return Triple::UnknownArch; } -// Returns architecture name that is unsderstood by the target assembler. +// Returns architecture name that is understood by the target assembler. const char *Triple::getArchNameForAssembler() { if (getOS() != Triple::Darwin && getVendor() != Triple::Apple) return NULL; diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 071c924..f59bd0d 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -81,9 +81,9 @@ void raw_ostream::SetBuffered() { SetUnbuffered(); } -void raw_ostream::SetBufferAndMode(char *BufferStart, size_t Size, +void raw_ostream::SetBufferAndMode(char *BufferStart, size_t Size, BufferKind Mode) { - assert(((Mode == Unbuffered && BufferStart == 0 && Size == 0) || + assert(((Mode == Unbuffered && BufferStart == 0 && Size == 0) || (Mode != Unbuffered && BufferStart && Size)) && "stream must be unbuffered or have at least one byte"); // Make sure the current buffer is free of content (we can't flush here; the @@ -104,11 +104,11 @@ raw_ostream &raw_ostream::operator<<(unsigned long N) { // Zero is a special case. if (N == 0) return *this << '0'; - + char NumberBuffer[20]; char *EndPtr = NumberBuffer+sizeof(NumberBuffer); char *CurPtr = EndPtr; - + while (N) { *--CurPtr = '0' + char(N % 10); N /= 10; @@ -121,7 +121,7 @@ raw_ostream &raw_ostream::operator<<(long N) { *this << '-'; N = -N; } - + return this->operator<<(static_cast<unsigned long>(N)); } @@ -133,7 +133,7 @@ raw_ostream &raw_ostream::operator<<(unsigned long long N) { char NumberBuffer[20]; char *EndPtr = NumberBuffer+sizeof(NumberBuffer); char *CurPtr = EndPtr; - + while (N) { *--CurPtr = '0' + char(N % 10); N /= 10; @@ -146,7 +146,7 @@ raw_ostream &raw_ostream::operator<<(long long N) { *this << '-'; N = -N; } - + return this->operator<<(static_cast<unsigned long long>(N)); } @@ -297,33 +297,33 @@ raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) { size_t BufferBytesLeft = OutBufEnd - OutBufCur; if (BufferBytesLeft > 3) { size_t BytesUsed = Fmt.print(OutBufCur, BufferBytesLeft); - + // Common case is that we have plenty of space. if (BytesUsed <= BufferBytesLeft) { OutBufCur += BytesUsed; return *this; } - + // Otherwise, we overflowed and the return value tells us the size to try // again with. NextBufferSize = BytesUsed; } - + // If we got here, we didn't have enough space in the output buffer for the // string. Try printing into a SmallVector that is resized to have enough // space. Iterate until we win. SmallVector<char, 128> V; - + while (1) { V.resize(NextBufferSize); - + // Try formatting into the SmallVector. size_t BytesUsed = Fmt.print(V.data(), NextBufferSize); - + // If BytesUsed fit into the vector, we win. if (BytesUsed <= NextBufferSize) return write(V.data(), BytesUsed); - + // Otherwise, try again with a new size. assert(BytesUsed > NextBufferSize && "Didn't grow buffer!?"); NextBufferSize = BytesUsed; @@ -339,7 +339,7 @@ raw_ostream &raw_ostream::indent(unsigned NumSpaces) { // Usually the indentation is small, handle it with a fastpath. if (NumSpaces < array_lengthof(Spaces)) return write(Spaces, NumSpaces); - + while (NumSpaces) { unsigned NumToWrite = std::min(NumSpaces, (unsigned)array_lengthof(Spaces)-1); @@ -372,7 +372,7 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo, // Verify that we don't have both "append" and "excl". assert((!(Flags & F_Excl) || !(Flags & F_Append)) && "Cannot specify both 'excl' and 'append' file creation flags!"); - + ErrorInfo.clear(); // Handle "-" as stdout. @@ -385,20 +385,20 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo, ShouldClose = false; return; } - + int OpenFlags = O_WRONLY|O_CREAT; #ifdef O_BINARY if (Flags & F_Binary) OpenFlags |= O_BINARY; #endif - + if (Flags & F_Append) OpenFlags |= O_APPEND; else OpenFlags |= O_TRUNC; if (Flags & F_Excl) OpenFlags |= O_EXCL; - + FD = open(Filename, OpenFlags, 0664); if (FD < 0) { ErrorInfo = "Error opening output file '" + std::string(Filename) + "'"; @@ -418,14 +418,14 @@ raw_fd_ostream::~raw_fd_ostream() { void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) { - assert (FD >= 0 && "File already closed."); + assert(FD >= 0 && "File already closed."); pos += Size; if (::write(FD, Ptr, Size) != (ssize_t) Size) error_detected(); } void raw_fd_ostream::close() { - assert (ShouldClose); + assert(ShouldClose); ShouldClose = false; flush(); if (::close(FD) != 0) @@ -438,7 +438,7 @@ uint64_t raw_fd_ostream::seek(uint64_t off) { pos = ::lseek(FD, off, SEEK_SET); if (pos != off) error_detected(); - return pos; + return pos; } size_t raw_fd_ostream::preferred_buffer_size() const { @@ -447,7 +447,7 @@ size_t raw_fd_ostream::preferred_buffer_size() const { struct stat statbuf; if (fstat(FD, &statbuf) != 0) return 0; - + // If this is a terminal, don't use buffering. Line buffering // would be a more traditional thing to do, but it's not worth // the complexity. diff --git a/lib/System/Unix/Mutex.inc b/lib/System/Unix/Mutex.inc index 10e7ecb..4a5e28d 100644 --- a/lib/System/Unix/Mutex.inc +++ b/lib/System/Unix/Mutex.inc @@ -29,12 +29,6 @@ MutexImpl::~MutexImpl() } bool -MutexImpl::MutexImpl() -{ - return true; -} - -bool MutexImpl::release() { return true; diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc index a99720c..52253b3 100644 --- a/lib/System/Unix/Path.inc +++ b/lib/System/Unix/Path.inc @@ -454,7 +454,7 @@ Path::canWrite() const { bool Path::isRegularFile() const { - // Get the status so we can determine if its a file or directory + // Get the status so we can determine if it's a file or directory struct stat buf; if (0 != stat(path.c_str(), &buf)) @@ -736,7 +736,7 @@ Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) { bool Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const { - // Get the status so we can determine if its a file or directory + // Get the status so we can determine if it's a file or directory. struct stat buf; if (0 != stat(path.c_str(), &buf)) { MakeErrMsg(ErrStr, path + ": can't get status of file"); diff --git a/lib/System/Win32/Program.inc b/lib/System/Win32/Program.inc index a3b40d0..16bb28e 100644 --- a/lib/System/Win32/Program.inc +++ b/lib/System/Win32/Program.inc @@ -138,6 +138,24 @@ static bool ArgNeedsQuotes(const char *Str) { return Str[0] == '\0' || strchr(Str, ' ') != 0; } + +/// ArgLenWithQuotes - Check whether argument needs to be quoted when calling +/// CreateProcess and returns length of quoted arg with escaped quotes +static unsigned int ArgLenWithQuotes(const char *Str) { + unsigned int len = ArgNeedsQuotes(Str) ? 2 : 0; + + while (*Str != '\0') { + if (*Str == '\"') + ++len; + + ++len; + ++Str; + } + + return len; +} + + bool Program::Execute(const Path& path, const char** args, @@ -165,9 +183,7 @@ Program::Execute(const Path& path, // First, determine the length of the command line. unsigned len = 0; for (unsigned i = 0; args[i]; i++) { - len += strlen(args[i]) + 1; - if (ArgNeedsQuotes(args[i])) - len += 2; + len += ArgLenWithQuotes(args[i]) + 1; } // Now build the command line. @@ -176,12 +192,18 @@ Program::Execute(const Path& path, for (unsigned i = 0; args[i]; i++) { const char *arg = args[i]; - size_t len = strlen(arg); + bool needsQuoting = ArgNeedsQuotes(arg); if (needsQuoting) *p++ = '"'; - memcpy(p, arg, len); - p += len; + + while (*arg != '\0') { + if (*arg == '\"') + *p++ = '\\'; + + *p++ = *arg++; + } + if (needsQuoting) *p++ = '"'; *p++ = ' '; diff --git a/lib/System/Win32/Signals.inc b/lib/System/Win32/Signals.inc index dba2218..f2b72ca 100644 --- a/lib/System/Win32/Signals.inc +++ b/lib/System/Win32/Signals.inc @@ -163,6 +163,7 @@ void sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) { CallBacksToRun = new std::vector<std::pair<void(*)(void*), void*> >(); CallBacksToRun->push_back(std::make_pair(FnPtr, Cookie)); RegisterHandler(); + LeaveCriticalSection(&CriticalSection); } } diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index bbb1dbd..6486a60 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -43,6 +43,21 @@ def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2", def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", "Enable half-precision floating point">; +// Some processors have multiply-accumulate instructions that don't +// play nicely with other VFP instructions, and it's generally better +// to just not use them. +// FIXME: Currently, this is only flagged for Cortex-A8. It may be true for +// others as well. We should do more benchmarking and confirm one way or +// the other. +def FeatureHasSlowVMLx : SubtargetFeature<"vmlx", "SlowVMLx", "true", + "Disable VFP MAC instructions">; +// Some processors benefit from using NEON instructions for scalar +// single-precision FP operations. +def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", + "true", + "Use NEON for single precision FP">; + + //===----------------------------------------------------------------------===// // ARM Processors supported. // @@ -92,7 +107,8 @@ def : ProcNoItin<"iwmmxt", [ArchV5TE]>; // V6 Processors. def : Processor<"arm1136j-s", ARMV6Itineraries, [ArchV6]>; -def : Processor<"arm1136jf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2]>; +def : Processor<"arm1136jf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2, + FeatureHasSlowVMLx]>; def : Processor<"arm1176jz-s", ARMV6Itineraries, [ArchV6]>; def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2]>; def : Processor<"mpcorenovfp", ARMV6Itineraries, [ArchV6]>; @@ -106,7 +122,8 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, // V7 Processors. def : Processor<"cortex-a8", CortexA8Itineraries, - [ArchV7A, FeatureThumb2, FeatureNEON]>; + [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx, + FeatureNEONForFP]>; def : ProcNoItin<"cortex-a9", [ArchV7A, FeatureThumb2, FeatureNEON]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index e6ea03a..0a0b0ea 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -204,7 +204,15 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) + if (I == MBB.begin()) + return false; + --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return false; + --I; + } + if (!isUnpredicatedTerminator(I)) return false; // Get the last instruction in the block. @@ -275,6 +283,11 @@ unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return 0; --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return 0; + --I; + } if (!isUncondBranchOpcode(I->getOpcode()) && !isCondBranchOpcode(I->getOpcode())) return 0; @@ -738,14 +751,16 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!"); // FIXME: Neon instructions should support predicates if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64)) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q)) .addFrameIndex(FI).addImm(128) .addMemOperand(MMO) .addReg(SrcReg, getKillRegState(isKill))); } else { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRQ)). + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ)). addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + .addFrameIndex(FI) + .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) + .addMemOperand(MMO)); } } } @@ -788,12 +803,14 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, RC == ARM::QPR_8RegisterClass) && "Unknown regclass!"); if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q), DestReg) .addFrameIndex(FI).addImm(128) .addMemOperand(MMO)); } else { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg) + .addFrameIndex(FI) + .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) + .addMemOperand(MMO)); } } } diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 71207c8..7d48663 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -124,17 +124,17 @@ private: /// SelectDYN_ALLOC - Select dynamic alloc for Thumb. SDNode *SelectDYN_ALLOC(SDNode *N); - /// SelectVLD - Select NEON load intrinsics. NumVecs should - /// be 2, 3 or 4. The opcode arrays specify the instructions used for + /// SelectVLD - Select NEON load intrinsics. NumVecs should be + /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for /// loads of D registers and even subregs and odd subregs of Q registers. - /// For NumVecs == 2, QOpcodes1 is not used. + /// For NumVecs <= 2, QOpcodes1 is not used. SDNode *SelectVLD(SDNode *N, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1); /// SelectVST - Select NEON store intrinsics. NumVecs should - /// be 2, 3 or 4. The opcode arrays specify the instructions used for + /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for /// stores of D registers and even subregs and odd subregs of Q registers. - /// For NumVecs == 2, QOpcodes1 is not used. + /// For NumVecs <= 2, QOpcodes1 is not used. SDNode *SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1); @@ -1022,7 +1022,7 @@ static EVT GetNEONSubregVT(EVT VT) { SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1) { - assert(NumVecs >=2 && NumVecs <= 4 && "VLD NumVecs out-of-range"); + assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); SDValue MemAddr, Align; @@ -1047,6 +1047,9 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, case MVT::v8i16: OpcodeIndex = 1; break; case MVT::v4f32: case MVT::v4i32: OpcodeIndex = 2; break; + case MVT::v2i64: OpcodeIndex = 3; + assert(NumVecs == 1 && "v2i64 type only supported for VLD1"); + break; } SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); @@ -1060,15 +1063,15 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, } EVT RegVT = GetNEONSubregVT(VT); - if (NumVecs == 2) { - // Quad registers are directly supported for VLD2, - // loading 2 pairs of D regs. + if (NumVecs <= 2) { + // Quad registers are directly supported for VLD1 and VLD2, + // loading pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain }; - std::vector<EVT> ResTys(4, VT); + std::vector<EVT> ResTys(2 * NumVecs, RegVT); ResTys.push_back(MVT::Other); SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); - Chain = SDValue(VLd, 4); + Chain = SDValue(VLd, 2 * NumVecs); // Combine the even and odd subregs to produce the result. for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { @@ -1109,7 +1112,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1) { - assert(NumVecs >=2 && NumVecs <= 4 && "VST NumVecs out-of-range"); + assert(NumVecs >=1 && NumVecs <= 4 && "VST NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); SDValue MemAddr, Align; @@ -1134,6 +1137,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, case MVT::v8i16: OpcodeIndex = 1; break; case MVT::v4f32: case MVT::v4i32: OpcodeIndex = 2; break; + case MVT::v2i64: OpcodeIndex = 3; + assert(NumVecs == 1 && "v2i64 type only supported for VST1"); + break; } SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); @@ -1154,9 +1160,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, } EVT RegVT = GetNEONSubregVT(VT); - if (NumVecs == 2) { - // Quad registers are directly supported for VST2, - // storing 2 pairs of D regs. + if (NumVecs <= 2) { + // Quad registers are directly supported for VST1 and VST2, + // storing pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, @@ -1167,7 +1173,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, Ops.push_back(Pred); Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), + 5 + 2 * NumVecs); } // Otherwise, quad registers are stored with two separate instructions, @@ -1694,6 +1701,35 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ResNode = SelectARMIndexedLoad(N); if (ResNode) return ResNode; + + // VLDMQ must be custom-selected for "v2f64 load" to set the AM5Opc value. + if (Subtarget->hasVFP2() && + N->getValueType(0).getSimpleVT().SimpleTy == MVT::v2f64) { + SDValue Chain = N->getOperand(0); + SDValue AM5Opc = + CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32); + SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain }; + return CurDAG->getMachineNode(ARM::VLDMQ, dl, MVT::v2f64, MVT::Other, + Ops, 5); + } + // Other cases are autogenerated. + break; + } + case ISD::STORE: { + // VSTMQ must be custom-selected for "v2f64 store" to set the AM5Opc value. + if (Subtarget->hasVFP2() && + N->getOperand(1).getValueType().getSimpleVT().SimpleTy == MVT::v2f64) { + SDValue Chain = N->getOperand(0); + SDValue AM5Opc = + CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32); + SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SDValue Ops[] = { N->getOperand(1), N->getOperand(2), + AM5Opc, Pred, PredReg, Chain }; + return CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6); + } // Other cases are autogenerated. break; } @@ -1831,16 +1867,24 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { default: break; + case Intrinsic::arm_neon_vld1: { + unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, + ARM::VLD1d32, ARM::VLD1d64 }; + unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, + ARM::VLD1q32, ARM::VLD1q64 }; + return SelectVLD(N, 1, DOpcodes, QOpcodes, 0); + } + case Intrinsic::arm_neon_vld2: { unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, - ARM::VLD2d32, ARM::VLD2d64 }; + ARM::VLD2d32, ARM::VLD1q64 }; unsigned QOpcodes[] = { ARM::VLD2q8, ARM::VLD2q16, ARM::VLD2q32 }; return SelectVLD(N, 2, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vld3: { unsigned DOpcodes[] = { ARM::VLD3d8, ARM::VLD3d16, - ARM::VLD3d32, ARM::VLD3d64 }; + ARM::VLD3d32, ARM::VLD1d64T }; unsigned QOpcodes0[] = { ARM::VLD3q8_UPD, ARM::VLD3q16_UPD, ARM::VLD3q32_UPD }; @@ -1852,7 +1896,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_neon_vld4: { unsigned DOpcodes[] = { ARM::VLD4d8, ARM::VLD4d16, - ARM::VLD4d32, ARM::VLD4d64 }; + ARM::VLD4d32, ARM::VLD1d64Q }; unsigned QOpcodes0[] = { ARM::VLD4q8_UPD, ARM::VLD4q16_UPD, ARM::VLD4q32_UPD }; @@ -1883,16 +1927,24 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); } + case Intrinsic::arm_neon_vst1: { + unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, + ARM::VST1d32, ARM::VST1d64 }; + unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, + ARM::VST1q32, ARM::VST1q64 }; + return SelectVST(N, 1, DOpcodes, QOpcodes, 0); + } + case Intrinsic::arm_neon_vst2: { unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, - ARM::VST2d32, ARM::VST2d64 }; + ARM::VST2d32, ARM::VST1q64 }; unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 }; return SelectVST(N, 2, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vst3: { unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16, - ARM::VST3d32, ARM::VST3d64 }; + ARM::VST3d32, ARM::VST1d64T }; unsigned QOpcodes0[] = { ARM::VST3q8_UPD, ARM::VST3q16_UPD, ARM::VST3q32_UPD }; @@ -1904,7 +1956,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_neon_vst4: { unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16, - ARM::VST4d32, ARM::VST4d64 }; + ARM::VST4d32, ARM::VST1d64Q }; unsigned QOpcodes0[] = { ARM::VST4q8_UPD, ARM::VST4q16_UPD, ARM::VST4q32_UPD }; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 0d0a004..b6c81f6 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -456,6 +456,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // Generic (and overly aggressive) if-conversion limits. setIfCvtBlockSizeLimit(10); setIfCvtDupBlockSizeLimit(2); + } else if (Subtarget->hasV7Ops()) { + setIfCvtBlockSizeLimit(3); + setIfCvtDupBlockSizeLimit(1); } else if (Subtarget->hasV6Ops()) { setIfCvtBlockSizeLimit(2); setIfCvtDupBlockSizeLimit(1); diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 4f6f05d..4427e50 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1,10 +1,10 @@ //===- ARMInstrFormats.td - ARM Instruction Formats --*- tablegen -*---------=// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -59,7 +59,18 @@ def NEONDupFrm : Format<28>; def MiscFrm : Format<29>; def ThumbMiscFrm : Format<30>; -def NLdStFrm : Format<31>; +def NLdStFrm : Format<31>; +def N1RegModImmFrm : Format<32>; +def N2RegFrm : Format<33>; +def NVCVTFrm : Format<34>; +def NVDupLnFrm : Format<35>; +def N2RegVShLFrm : Format<36>; +def N2RegVShRFrm : Format<37>; +def N3RegFrm : Format<38>; +def N3RegVShFrm : Format<39>; +def NVExtFrm : Format<40>; +def NVMulSLFrm : Format<41>; +def NVTBLFrm : Format<42>; // Misc flags. @@ -177,13 +188,13 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im, // TSFlagsFields AddrMode AM = am; bits<4> AddrModeBits = AM.Value; - + SizeFlagVal SZ = sz; bits<3> SizeFlag = SZ.Value; IndexMode IM = im; bits<2> IndexModeBits = IM.Value; - + Format F = f; bits<6> Form = F.Value; @@ -195,7 +206,7 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im, // bit isUnaryDataProc = 0; bit canXformTo16Bit = 0; - + let Constraints = cstr; let Itinerary = itin; } @@ -214,9 +225,9 @@ class InstThumb<AddrMode am, SizeFlagVal sz, IndexMode im, Format f, Domain d, string cstr, InstrItinClass itin> : InstTemplate<am, sz, im, f, d, cstr, itin>; -class PseudoInst<dag oops, dag iops, InstrItinClass itin, +class PseudoInst<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> - : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, GenericDomain, + : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, GenericDomain, "", itin> { let OutOperandList = oops; let InOperandList = iops; @@ -226,7 +237,7 @@ class PseudoInst<dag oops, dag iops, InstrItinClass itin, // Almost all ARM instructions are predicable. class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, - IndexMode im, Format f, InstrItinClass itin, + IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { @@ -238,9 +249,9 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, } // A few are not predicable class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz, - IndexMode im, Format f, InstrItinClass itin, - string opc, string asm, string cstr, - list<dag> pattern> + IndexMode im, Format f, InstrItinClass itin, + string opc, string asm, string cstr, + list<dag> pattern> : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; @@ -290,9 +301,9 @@ class AXI<dag oops, dag iops, Format f, InstrItinClass itin, : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin, asm, "", pattern>; class AInoP<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, list<dag> pattern> + string opc, string asm, list<dag> pattern> : InoP<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin, - opc, asm, "", pattern>; + opc, asm, "", pattern>; // Ctrl flow instructions class ABI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin, @@ -362,7 +373,7 @@ class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin, let Inst{24-21} = opcod; let Inst{27-26} = {0,0}; } -class AI1x2<dag oops, dag iops, Format f, InstrItinClass itin, +class AI1x2<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> : I<oops, iops, AddrMode1, Size8Bytes, IndexModeNone, f, itin, opc, asm, "", pattern>; @@ -387,7 +398,7 @@ class AI2ldw<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{24} = 1; // P bit let Inst{27-26} = {0,1}; } -class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin, +class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin, string asm, list<dag> pattern> : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin, asm, "", pattern> { @@ -407,7 +418,7 @@ class AI2ldb<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{24} = 1; // P bit let Inst{27-26} = {0,1}; } -class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin, +class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin, string asm, list<dag> pattern> : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin, asm, "", pattern> { @@ -549,7 +560,7 @@ class AI2stbpo<dag oops, dag iops, Format f, InstrItinClass itin, } // addrmode3 instructions -class AI3<dag oops, dag iops, Format f, InstrItinClass itin, +class AI3<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin, opc, asm, "", pattern>; @@ -853,7 +864,6 @@ class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{27-25} = 0b000; } - // addrmode4 instructions class AXI4ld<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin, string asm, string cstr, list<dag> pattern> @@ -961,20 +971,25 @@ class TI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> : ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "", pattern>; // Two-address instructions -class TIt<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> - : ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "$lhs = $dst", pattern>; +class TIt<dag oops, dag iops, InstrItinClass itin, string asm, + list<dag> pattern> + : ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "$lhs = $dst", + pattern>; // tBL, tBX 32-bit instructions class TIx2<bits<5> opcod1, bits<2> opcod2, bit opcod3, - dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> - : ThumbI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>, Encoding { + dag oops, dag iops, InstrItinClass itin, string asm, + list<dag> pattern> + : ThumbI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>, + Encoding { let Inst{31-27} = opcod1; let Inst{15-14} = opcod2; let Inst{12} = opcod3; } // BR_JT instructions -class TJTI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> +class TJTI<dag oops, dag iops, InstrItinClass itin, string asm, + list<dag> pattern> : ThumbI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>; // Thumb1 only @@ -1001,7 +1016,7 @@ class T1JTI<dag oops, dag iops, InstrItinClass itin, // Two-address instructions class T1It<dag oops, dag iops, InstrItinClass itin, string asm, string cstr, list<dag> pattern> - : Thumb1I<oops, iops, AddrModeNone, Size2Bytes, itin, + : Thumb1I<oops, iops, AddrModeNone, Size2Bytes, itin, asm, cstr, pattern>; // Thumb1 instruction that can either be predicated or set CPSR. @@ -1024,7 +1039,7 @@ class T1sI<dag oops, dag iops, InstrItinClass itin, class T1sIt<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : Thumb1sI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm, - "$lhs = $dst", pattern>; + "$lhs = $dst", pattern>; // Thumb1 instruction that can be predicated. class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, @@ -1046,7 +1061,7 @@ class T1pI<dag oops, dag iops, InstrItinClass itin, class T1pIt<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : Thumb1pI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm, - "$lhs = $dst", pattern>; + "$lhs = $dst", pattern>; class T1pI1<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> @@ -1057,7 +1072,7 @@ class T1pI2<dag oops, dag iops, InstrItinClass itin, class T1pI4<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : Thumb1pI<oops, iops, AddrModeT1_4, Size2Bytes, itin, opc, asm, "", pattern>; -class T1pIs<dag oops, dag iops, +class T1pIs<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : Thumb1pI<oops, iops, AddrModeT1_s, Size2Bytes, itin, opc, asm, "", pattern>; @@ -1146,8 +1161,8 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, } class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, - InstrItinClass itin, - string asm, string cstr, list<dag> pattern> + InstrItinClass itin, + string asm, string cstr, list<dag> pattern> : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; @@ -1161,7 +1176,7 @@ class T2I<dag oops, dag iops, InstrItinClass itin, : Thumb2I<oops, iops, AddrModeNone, Size4Bytes, itin, opc, asm, "", pattern>; class T2Ii12<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : Thumb2I<oops, iops, AddrModeT2_i12, Size4Bytes, itin, opc, asm, "", pattern>; + : Thumb2I<oops, iops, AddrModeT2_i12, Size4Bytes, itin, opc, asm, "",pattern>; class T2Ii8<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : Thumb2I<oops, iops, AddrModeT2_i8, Size4Bytes, itin, opc, asm, "", pattern>; @@ -1196,7 +1211,7 @@ class T2JTI<dag oops, dag iops, InstrItinClass itin, : Thumb2XI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>; class T2Ix2<dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> + string opc, string asm, list<dag> pattern> : Thumb2I<oops, iops, AddrModeNone, Size8Bytes, itin, opc, asm, "", pattern>; // Two-address instructions @@ -1295,7 +1310,7 @@ class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone, - VFPLdStFrm, itin, opc, asm, "", pattern> { + VFPLdStFrm, itin, opc, asm, "", pattern> { // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-24} = opcod1; let Inst{21-20} = opcod2; @@ -1309,7 +1324,7 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone, - VFPLdStFrm, itin, opc, asm, "", pattern> { + VFPLdStFrm, itin, opc, asm, "", pattern> { // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-24} = opcod1; let Inst{21-20} = opcod2; @@ -1320,7 +1335,7 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, class AXDI5<dag oops, dag iops, IndexMode im, InstrItinClass itin, string asm, string cstr, list<dag> pattern> : VFPXI<oops, iops, AddrMode5, Size4Bytes, im, - VFPLdStMulFrm, itin, asm, cstr, pattern> { + VFPLdStMulFrm, itin, asm, cstr, pattern> { // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-25} = 0b110; let Inst{11-8} = 0b1011; @@ -1332,7 +1347,7 @@ class AXDI5<dag oops, dag iops, IndexMode im, InstrItinClass itin, class AXSI5<dag oops, dag iops, IndexMode im, InstrItinClass itin, string asm, string cstr, list<dag> pattern> : VFPXI<oops, iops, AddrMode5, Size4Bytes, im, - VFPLdStMulFrm, itin, asm, cstr, pattern> { + VFPLdStMulFrm, itin, asm, cstr, pattern> { // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-25} = 0b110; let Inst{11-8} = 0b1010; @@ -1353,7 +1368,8 @@ class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, // Double precision, binary class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, - dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> + dag iops, InstrItinClass itin, string opc, string asm, + list<dag> pattern> : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> { let Inst{27-23} = opcod1; let Inst{21-20} = opcod2; @@ -1362,6 +1378,20 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, let Inst{4} = op4; } +// Double precision, binary, VML[AS] (for additional predicate) +class ADbI_vmlX<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, + dag iops, InstrItinClass itin, string opc, string asm, + list<dag> pattern> + : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> { + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-8} = 0b1011; + let Inst{6} = op6; + let Inst{4} = op4; + list<Predicate> Predicates = [HasVFP2, UseVMLx]; +} + + // Single precision, unary class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, @@ -1399,7 +1429,8 @@ class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, // Single precision binary, if no NEON // Same as ASbI except not available if NEON is enabled class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, - dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> + dag iops, InstrItinClass itin, string opc, string asm, + list<dag> pattern> : ASbI<opcod1, opcod2, op6, op4, oops, iops, itin, opc, asm, pattern> { list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP]; } @@ -1419,8 +1450,8 @@ class AVConv1I<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4, // VFP conversion between floating-point and fixed-point class AVConv1XI<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, - dag oops, dag iops, InstrItinClass itin, string opc, string asm, - list<dag> pattern> + dag oops, dag iops, InstrItinClass itin, string opc, string asm, + list<dag> pattern> : AVConv1I<op1, op2, op3, op4, oops, iops, itin, opc, asm, pattern> { // size (fixed-point number): sx == 0 ? 16 : 32 let Inst{7} = op5; // sx @@ -1448,7 +1479,7 @@ class AVConv2I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : AVConvXI<opcod1, opcod2, oops, iops, VFPConv2Frm, itin, opc, asm, pattern>; -class AVConv3I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, +class AVConv3I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : AVConvXI<opcod1, opcod2, oops, iops, VFPConv3Frm, itin, opc, asm, pattern>; @@ -1480,9 +1511,10 @@ class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f, } // Same as NeonI except it does not have a "data type" specifier. -class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : InstARM<am, Size4Bytes, im, NEONFrm, NeonDomain, cstr, itin> { +class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f, + InstrItinClass itin, string opc, string asm, string cstr, + list<dag> pattern> + : InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(!strconcat(opc, "${p}"), !strconcat("\t", asm)); @@ -1490,18 +1522,6 @@ class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin, list<Predicate> Predicates = [HasNEON]; } -class NI<dag oops, dag iops, InstrItinClass itin, string opc, string asm, - list<dag> pattern> - : NeonXI<oops, iops, AddrModeNone, IndexModeNone, itin, opc, asm, "", - pattern> { -} - -class NI4<dag oops, dag iops, InstrItinClass itin, string opc, - string asm, list<dag> pattern> - : NeonXI<oops, iops, AddrMode4, IndexModeNone, itin, opc, asm, "", - pattern> { -} - class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, dag oops, dag iops, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> @@ -1514,17 +1534,17 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, let Inst{7-4} = op7_4; } -class NDataI<dag oops, dag iops, InstrItinClass itin, +class NDataI<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> - : NeonI<oops, iops, AddrModeNone, IndexModeNone, NEONFrm, itin, opc, dt, asm, - cstr, pattern> { + : NeonI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, dt, asm, cstr, + pattern> { let Inst{31-25} = 0b1111001; } -class NDataXI<dag oops, dag iops, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : NeonXI<oops, iops, AddrModeNone, IndexModeNone, itin, opc, asm, - cstr, pattern> { +class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string asm, string cstr, list<dag> pattern> + : NeonXI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, asm, + cstr, pattern> { let Inst{31-25} = 0b1111001; } @@ -1532,8 +1552,9 @@ class NDataXI<dag oops, dag iops, InstrItinClass itin, class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6, bit op5, bit op4, dag oops, dag iops, InstrItinClass itin, - string opc, string dt, string asm, string cstr, list<dag> pattern> - : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> { + string opc, string dt, string asm, string cstr, + list<dag> pattern> + : NDataI<oops, iops, N1RegModImmFrm, itin, opc, dt, asm, cstr, pattern> { let Inst{23} = op23; let Inst{21-19} = op21_19; let Inst{11-8} = op11_8; @@ -1548,7 +1569,7 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> - : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> { + : NDataI<oops, iops, N2RegFrm, itin, opc, dt, asm, cstr, pattern> { let Inst{24-23} = op24_23; let Inst{21-20} = op21_20; let Inst{19-18} = op19_18; @@ -1560,10 +1581,10 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, // Same as N2V except it doesn't have a datatype suffix. class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, - bits<5> op11_7, bit op6, bit op4, - dag oops, dag iops, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : NDataXI<oops, iops, itin, opc, asm, cstr, pattern> { + bits<5> op11_7, bit op6, bit op4, + dag oops, dag iops, InstrItinClass itin, + string opc, string asm, string cstr, list<dag> pattern> + : NDataXI<oops, iops, N2RegFrm, itin, opc, asm, cstr, pattern> { let Inst{24-23} = op24_23; let Inst{21-20} = op21_20; let Inst{19-18} = op19_18; @@ -1575,9 +1596,9 @@ class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, // NEON 2 vector register with immediate. class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, - dag oops, dag iops, InstrItinClass itin, + dag oops, dag iops, Format f, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> - : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> { + : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> { let Inst{24} = op24; let Inst{23} = op23; let Inst{11-8} = op11_8; @@ -1588,9 +1609,9 @@ class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, // NEON 3 vector register format. class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, - dag oops, dag iops, InstrItinClass itin, + dag oops, dag iops, Format f, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> - : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> { + : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> { let Inst{24} = op24; let Inst{23} = op23; let Inst{21-20} = op21_20; @@ -1599,11 +1620,12 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, let Inst{4} = op4; } -// Same as N3VX except it doesn't have a data type suffix. -class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, - dag oops, dag iops, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : NDataXI<oops, iops, itin, opc, asm, cstr, pattern> { +// Same as N3V except it doesn't have a data type suffix. +class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, + dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string asm, string cstr, list<dag> pattern> + : NDataXI<oops, iops, f, itin, opc, asm, cstr, pattern> { let Inst{24} = op24; let Inst{23} = op23; let Inst{21-20} = op21_20; @@ -1617,7 +1639,7 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string dt, string asm, list<dag> pattern> : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, GenericDomain, - "", itin> { + "", itin> { let Inst{27-20} = opcod1; let Inst{11-8} = opcod2; let Inst{6-5} = opcod3; @@ -1647,6 +1669,19 @@ class NVDup<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONDupFrm, itin, opc, dt, asm, pattern>; +// Vector Duplicate Lane (from scalar to all elements) +class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops, + InstrItinClass itin, string opc, string dt, string asm, + list<dag> pattern> + : NDataI<oops, iops, NVDupLnFrm, itin, opc, dt, asm, "", pattern> { + let Inst{24-23} = 0b11; + let Inst{21-20} = 0b11; + let Inst{19-16} = op19_16; + let Inst{11-7} = 0b11000; + let Inst{6} = op6; + let Inst{4} = 0; +} + // NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON // for single-precision FP. class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> { diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 26a2806..f2ab06f 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -140,6 +140,8 @@ def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">; def UseMovt : Predicate<"Subtarget->useMovt()">; def DontUseMovt : Predicate<"!Subtarget->useMovt()">; +def UseVMLx : Predicate<"Subtarget->useVMLx()">; + //===----------------------------------------------------------------------===// // ARM Flag Definitions. diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index c977cc3..ed9d31d 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -115,51 +115,80 @@ def h64imm : Operand<i64> { // NEON load / store instructions //===----------------------------------------------------------------------===// +let mayLoad = 1 in { // Use vldmia to load a Q register as a D register pair. -def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), IIC_fpLoadm, - "vldmia", "$addr, ${dst:dregpair}", - [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> { - let Inst{27-25} = 0b110; - let Inst{24} = 0; // P bit - let Inst{23} = 1; // U bit - let Inst{20} = 1; - let Inst{11-8} = 0b1011; -} +// This is equivalent to VLDMD except that it has a Q register operand +// instead of a pair of D registers. +def VLDMQ + : AXDI5<(outs QPR:$dst), (ins addrmode5:$addr, pred:$p), + IndexModeNone, IIC_fpLoadm, + "vldm${addr:submode}${p}\t${addr:base}, ${dst:dregpair}", "", []>; +def VLDMQ_UPD + : AXDI5<(outs QPR:$dst, GPR:$wb), (ins addrmode5:$addr, pred:$p), + IndexModeUpd, IIC_fpLoadm, + "vldm${addr:submode}${p}\t${addr:base}!, ${dst:dregpair}", + "$addr.base = $wb", []>; + +// Use vld1 to load a Q register as a D register pair. +// This alternative to VLDMQ allows an alignment to be specified. +// This is equivalent to VLD1q64 except that it has a Q register operand. +def VLD1q + : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst), (ins addrmode6:$addr), + IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>; +def VLD1q_UPD + : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", "64", + "${dst:dregpair}, $addr$offset", "$addr.addr = $wb", []>; +} // mayLoad = 1 +let mayStore = 1 in { // Use vstmia to store a Q register as a D register pair. -def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem, - "vstmia", "$addr, ${src:dregpair}", - [(store (v2f64 QPR:$src), addrmode4:$addr)]> { - let Inst{27-25} = 0b110; - let Inst{24} = 0; // P bit - let Inst{23} = 1; // U bit - let Inst{20} = 0; - let Inst{11-8} = 0b1011; -} +// This is equivalent to VSTMD except that it has a Q register operand +// instead of a pair of D registers. +def VSTMQ + : AXDI5<(outs), (ins QPR:$src, addrmode5:$addr, pred:$p), + IndexModeNone, IIC_fpStorem, + "vstm${addr:submode}${p}\t${addr:base}, ${src:dregpair}", "", []>; +def VSTMQ_UPD + : AXDI5<(outs GPR:$wb), (ins QPR:$src, addrmode5:$addr, pred:$p), + IndexModeUpd, IIC_fpStorem, + "vstm${addr:submode}${p}\t${addr:base}!, ${src:dregpair}", + "$addr.base = $wb", []>; + +// Use vst1 to store a Q register as a D register pair. +// This alternative to VSTMQ allows an alignment to be specified. +// This is equivalent to VST1q64 except that it has a Q register operand. +def VST1q + : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, QPR:$src), + IIC_VST, "vst1", "64", "${src:dregpair}, $addr", "", []>; +def VST1q_UPD + : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QPR:$src), + IIC_VST, "vst1", "64", "{$src:dregpair}, $addr$offset", + "$addr.addr = $wb", []>; +} // mayStore = 1 -// VLD1 : Vector Load (multiple single elements) -class VLD1D<bits<4> op7_4, string Dt, ValueType Ty> - : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1, - "vld1", Dt, "\\{$dst\\}, $addr", "", - [(set DPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>; -class VLD1Q<bits<4> op7_4, string Dt, ValueType Ty> - : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1, - "vld1", Dt, "${dst:dregpair}, $addr", "", - [(set QPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>; - -def VLD1d8 : VLD1D<0b0000, "8", v8i8>; -def VLD1d16 : VLD1D<0b0100, "16", v4i16>; -def VLD1d32 : VLD1D<0b1000, "32", v2i32>; -def VLD1df : VLD1D<0b1000, "32", v2f32>; -def VLD1d64 : VLD1D<0b1100, "64", v1i64>; - -def VLD1q8 : VLD1Q<0b0000, "8", v16i8>; -def VLD1q16 : VLD1Q<0b0100, "16", v8i16>; -def VLD1q32 : VLD1Q<0b1000, "32", v4i32>; -def VLD1qf : VLD1Q<0b1000, "32", v4f32>; -def VLD1q64 : VLD1Q<0b1100, "64", v2i64>; +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { -let mayLoad = 1 in { +// VLD1 : Vector Load (multiple single elements) +class VLD1D<bits<4> op7_4, string Dt> + : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), + (ins addrmode6:$addr), IIC_VLD1, + "vld1", Dt, "\\{$dst\\}, $addr", "", []>; +class VLD1Q<bits<4> op7_4, string Dt> + : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$dst1, DPR:$dst2), + (ins addrmode6:$addr), IIC_VLD1, + "vld1", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; + +def VLD1d8 : VLD1D<0b0000, "8">; +def VLD1d16 : VLD1D<0b0100, "16">; +def VLD1d32 : VLD1D<0b1000, "32">; +def VLD1d64 : VLD1D<0b1100, "64">; + +def VLD1q8 : VLD1Q<0b0000, "8">; +def VLD1q16 : VLD1Q<0b0100, "16">; +def VLD1q32 : VLD1Q<0b1000, "32">; +def VLD1q64 : VLD1Q<0b1100, "64">; // ...with address register writeback: class VLD1DWB<bits<4> op7_4, string Dt> @@ -182,54 +211,48 @@ def VLD1q8_UPD : VLD1QWB<0b0000, "8">; def VLD1q16_UPD : VLD1QWB<0b0100, "16">; def VLD1q32_UPD : VLD1QWB<0b1000, "32">; def VLD1q64_UPD : VLD1QWB<0b1100, "64">; -} // mayLoad = 1 -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { - -// These (dreg triple/quadruple) are for disassembly only. +// ...with 3 registers (some of these are only for the disassembler): class VLD1D3<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt, - "\\{$dst1, $dst2, $dst3\\}, $addr", "", - [/* For disassembly only; pattern left blank */]>; -class VLD1D4<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt, - "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", - [/* For disassembly only; pattern left blank */]>; - -def VLD1d8T : VLD1D3<0b0000, "8">; -def VLD1d16T : VLD1D3<0b0100, "16">; -def VLD1d32T : VLD1D3<0b1000, "32">; -// VLD1d64T : implemented as VLD3d64 - -def VLD1d8Q : VLD1D4<0b0000, "8">; -def VLD1d16Q : VLD1D4<0b0100, "16">; -def VLD1d32Q : VLD1D4<0b1000, "32">; -// VLD1d64Q : implemented as VLD4d64 - -// ...with address register writeback: + "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>; class VLD1D3WB<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt, - "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb", - [/* For disassembly only; pattern left blank */]>; + "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb", []>; + +def VLD1d8T : VLD1D3<0b0000, "8">; +def VLD1d16T : VLD1D3<0b0100, "16">; +def VLD1d32T : VLD1D3<0b1000, "32">; +def VLD1d64T : VLD1D3<0b1100, "64">; + +def VLD1d8T_UPD : VLD1D3WB<0b0000, "8">; +def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">; +def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">; +def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">; + +// ...with 4 registers (some of these are only for the disassembler): +class VLD1D4<bits<4> op7_4, string Dt> + : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt, + "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; class VLD1D4WB<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0010,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb", - [/* For disassembly only; pattern left blank */]>; + []>; -def VLD1d8T_UPD : VLD1D3WB<0b0000, "8">; -def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">; -def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">; -// VLD1d64T_UPD : implemented as VLD3d64_UPD +def VLD1d8Q : VLD1D4<0b0000, "8">; +def VLD1d16Q : VLD1D4<0b0100, "16">; +def VLD1d32Q : VLD1D4<0b1000, "32">; +def VLD1d64Q : VLD1D4<0b1100, "64">; def VLD1d8Q_UPD : VLD1D4WB<0b0000, "8">; def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">; def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">; -// VLD1d64Q_UPD : implemented as VLD4d64_UPD +def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">; // VLD2 : Vector Load (multiple 2-element structures) class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -245,9 +268,6 @@ class VLD2Q<bits<4> op7_4, string Dt> def VLD2d8 : VLD2D<0b1000, 0b0000, "8">; def VLD2d16 : VLD2D<0b1000, 0b0100, "16">; def VLD2d32 : VLD2D<0b1000, 0b1000, "32">; -def VLD2d64 : NLdSt<0,0b10,0b1010,0b1100, (outs DPR:$dst1, DPR:$dst2), - (ins addrmode6:$addr), IIC_VLD1, - "vld1", "64", "\\{$dst1, $dst2\\}, $addr", "", []>; def VLD2q8 : VLD2Q<0b0000, "8">; def VLD2q16 : VLD2Q<0b0100, "16">; @@ -269,11 +289,6 @@ class VLD2QWB<bits<4> op7_4, string Dt> def VLD2d8_UPD : VLD2DWB<0b1000, 0b0000, "8">; def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">; def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">; -def VLD2d64_UPD : NLdSt<0,0b10,0b1010,0b1100, - (outs DPR:$dst1, DPR:$dst2, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, - "vld1", "64", "\\{$dst1, $dst2\\}, $addr$offset", - "$addr.addr = $wb", []>; def VLD2q8_UPD : VLD2QWB<0b0000, "8">; def VLD2q16_UPD : VLD2QWB<0b0100, "16">; @@ -296,10 +311,6 @@ class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> def VLD3d8 : VLD3D<0b0100, 0b0000, "8">; def VLD3d16 : VLD3D<0b0100, 0b0100, "16">; def VLD3d32 : VLD3D<0b0100, 0b1000, "32">; -def VLD3d64 : NLdSt<0,0b10,0b0110,0b1100, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), - (ins addrmode6:$addr), IIC_VLD1, - "vld1", "64", "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>; // ...with address register writeback: class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -312,11 +323,6 @@ class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">; def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">; def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">; -def VLD3d64_UPD : NLdSt<0,0b10,0b0110,0b1100, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, - "vld1", "64", "\\{$dst1, $dst2, $dst3\\}, $addr$offset", - "$addr.addr = $wb", []>; // ...with double-spaced registers (non-updating versions for disassembly only): def VLD3q8 : VLD3D<0b0101, 0b0000, "8">; @@ -341,11 +347,6 @@ class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> def VLD4d8 : VLD4D<0b0000, 0b0000, "8">; def VLD4d16 : VLD4D<0b0000, 0b0100, "16">; def VLD4d32 : VLD4D<0b0000, 0b1000, "32">; -def VLD4d64 : NLdSt<0,0b10,0b0010,0b1100, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6:$addr), IIC_VLD1, - "vld1", "64", "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", - "", []>; // ...with address register writeback: class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -358,13 +359,6 @@ class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">; def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">; def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">; -def VLD4d64_UPD : NLdSt<0,0b10,0b0010,0b1100, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, - GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, - "vld1", "64", - "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", - "$addr.addr = $wb", []>; // ...with double-spaced registers (non-updating versions for disassembly only): def VLD4q8 : VLD4D<0b0001, 0b0000, "8">; @@ -383,62 +377,62 @@ def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">; // FIXME: Not yet implemented. // VLD2LN : Vector Load (single 2-element structure to one lane) -class VLD2LN<bits<4> op11_8, string Dt> - : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2), +class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2, "vld2", Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr", "$src1 = $dst1, $src2 = $dst2", []>; -def VLD2LNd8 : VLD2LN<0b0001, "8">; -def VLD2LNd16 : VLD2LN<0b0101, "16"> { let Inst{5} = 0; } -def VLD2LNd32 : VLD2LN<0b1001, "32"> { let Inst{6} = 0; } +def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8">; +def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16">; +def VLD2LNd32 : VLD2LN<0b1001, {?,0,?,?}, "32">; // ...with double-spaced registers: -def VLD2LNq16 : VLD2LN<0b0101, "16"> { let Inst{5} = 1; } -def VLD2LNq32 : VLD2LN<0b1001, "32"> { let Inst{6} = 1; } +def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16">; +def VLD2LNq32 : VLD2LN<0b1001, {?,1,?,?}, "32">; // ...alternate versions to be allocated odd register numbers: -def VLD2LNq16odd : VLD2LN<0b0101, "16"> { let Inst{5} = 1; } -def VLD2LNq32odd : VLD2LN<0b1001, "32"> { let Inst{6} = 1; } +def VLD2LNq16odd : VLD2LN<0b0101, {?,?,1,?}, "16">; +def VLD2LNq32odd : VLD2LN<0b1001, {?,1,?,?}, "32">; // ...with address register writeback: -class VLD2LNWB<bits<4> op11_8, string Dt> - : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), +class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2, "vld2", Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr$offset", "$src1 = $dst1, $src2 = $dst2, $addr.addr = $wb", []>; -def VLD2LNd8_UPD : VLD2LNWB<0b0001, "8">; -def VLD2LNd16_UPD : VLD2LNWB<0b0101, "16"> { let Inst{5} = 0; } -def VLD2LNd32_UPD : VLD2LNWB<0b1001, "32"> { let Inst{6} = 0; } +def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8">; +def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16">; +def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,?,?}, "32">; -def VLD2LNq16_UPD : VLD2LNWB<0b0101, "16"> { let Inst{5} = 1; } -def VLD2LNq32_UPD : VLD2LNWB<0b1001, "32"> { let Inst{6} = 1; } +def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16">; +def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,?,?}, "32">; // VLD3LN : Vector Load (single 3-element structure to one lane) -class VLD3LN<bits<4> op11_8, string Dt> - : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), +class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), IIC_VLD3, "vld3", Dt, "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr", "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; -def VLD3LNd8 : VLD3LN<0b0010, "8"> { let Inst{4} = 0; } -def VLD3LNd16 : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b00; } -def VLD3LNd32 : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b000; } +def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8">; +def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16">; +def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32">; // ...with double-spaced registers: -def VLD3LNq16 : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b10; } -def VLD3LNq32 : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b100; } +def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16">; +def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32">; // ...alternate versions to be allocated odd register numbers: -def VLD3LNq16odd : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b10; } -def VLD3LNq32odd : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b100; } +def VLD3LNq16odd : VLD3LN<0b0110, {?,?,1,0}, "16">; +def VLD3LNq32odd : VLD3LN<0b1010, {?,1,0,0}, "32">; // ...with address register writeback: -class VLD3LNWB<bits<4> op11_8, string Dt> - : NLdSt<1, 0b10, op11_8, {?,?,?,?}, +class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), @@ -447,37 +441,37 @@ class VLD3LNWB<bits<4> op11_8, string Dt> "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $addr.addr = $wb", []>; -def VLD3LNd8_UPD : VLD3LNWB<0b0010, "8"> { let Inst{4} = 0; } -def VLD3LNd16_UPD : VLD3LNWB<0b0110, "16"> { let Inst{5-4} = 0b00; } -def VLD3LNd32_UPD : VLD3LNWB<0b1010, "32"> { let Inst{6-4} = 0b000; } +def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8">; +def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16">; +def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32">; -def VLD3LNq16_UPD : VLD3LNWB<0b0110, "16"> { let Inst{5-4} = 0b10; } -def VLD3LNq32_UPD : VLD3LNWB<0b1010, "32"> { let Inst{6-4} = 0b100; } +def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16">; +def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32">; // VLD4LN : Vector Load (single 4-element structure to one lane) -class VLD4LN<bits<4> op11_8, string Dt> - : NLdSt<1, 0b10, op11_8, {?,?,?,?}, +class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), IIC_VLD4, "vld4", Dt, "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr", "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; -def VLD4LNd8 : VLD4LN<0b0011, "8">; -def VLD4LNd16 : VLD4LN<0b0111, "16"> { let Inst{5} = 0; } -def VLD4LNd32 : VLD4LN<0b1011, "32"> { let Inst{6} = 0; } +def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8">; +def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16">; +def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32">; // ...with double-spaced registers: -def VLD4LNq16 : VLD4LN<0b0111, "16"> { let Inst{5} = 1; } -def VLD4LNq32 : VLD4LN<0b1011, "32"> { let Inst{6} = 1; } +def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16">; +def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32">; // ...alternate versions to be allocated odd register numbers: -def VLD4LNq16odd : VLD4LN<0b0111, "16"> { let Inst{5} = 1; } -def VLD4LNq32odd : VLD4LN<0b1011, "32"> { let Inst{6} = 1; } +def VLD4LNq16odd : VLD4LN<0b0111, {?,?,1,?}, "16">; +def VLD4LNq32odd : VLD4LN<0b1011, {?,1,?,?}, "32">; // ...with address register writeback: -class VLD4LNWB<bits<4> op11_8, string Dt> - : NLdSt<1, 0b10, op11_8, {?,?,?,?}, +class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), @@ -486,12 +480,12 @@ class VLD4LNWB<bits<4> op11_8, string Dt> "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $addr.addr = $wb", []>; -def VLD4LNd8_UPD : VLD4LNWB<0b0011, "8">; -def VLD4LNd16_UPD : VLD4LNWB<0b0111, "16"> { let Inst{5} = 0; } -def VLD4LNd32_UPD : VLD4LNWB<0b1011, "32"> { let Inst{6} = 0; } +def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8">; +def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16">; +def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32">; -def VLD4LNq16_UPD : VLD4LNWB<0b0111, "16"> { let Inst{5} = 1; } -def VLD4LNq32_UPD : VLD4LNWB<0b1011, "32"> { let Inst{6} = 1; } +def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16">; +def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">; // VLD1DUP : Vector Load (single element to all lanes) // VLD2DUP : Vector Load (single 2-element structure to all lanes) @@ -500,31 +494,26 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, "32"> { let Inst{6} = 1; } // FIXME: Not yet implemented. } // mayLoad = 1, hasExtraDefRegAllocReq = 1 +let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { + // VST1 : Vector Store (multiple single elements) -class VST1D<bits<4> op7_4, string Dt, ValueType Ty> +class VST1D<bits<4> op7_4, string Dt> : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST, - "vst1", Dt, "\\{$src\\}, $addr", "", - [(int_arm_neon_vst1 addrmode6:$addr, (Ty DPR:$src))]>; -class VST1Q<bits<4> op7_4, string Dt, ValueType Ty> - : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, - "vst1", Dt, "${src:dregpair}, $addr", "", - [(int_arm_neon_vst1 addrmode6:$addr, (Ty QPR:$src))]>; - -let hasExtraSrcRegAllocReq = 1 in { -def VST1d8 : VST1D<0b0000, "8", v8i8>; -def VST1d16 : VST1D<0b0100, "16", v4i16>; -def VST1d32 : VST1D<0b1000, "32", v2i32>; -def VST1df : VST1D<0b1000, "32", v2f32>; -def VST1d64 : VST1D<0b1100, "64", v1i64>; - -def VST1q8 : VST1Q<0b0000, "8", v16i8>; -def VST1q16 : VST1Q<0b0100, "16", v8i16>; -def VST1q32 : VST1Q<0b1000, "32", v4i32>; -def VST1qf : VST1Q<0b1000, "32", v4f32>; -def VST1q64 : VST1Q<0b1100, "64", v2i64>; -} // hasExtraSrcRegAllocReq - -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { + "vst1", Dt, "\\{$src\\}, $addr", "", []>; +class VST1Q<bits<4> op7_4, string Dt> + : NLdSt<0,0b00,0b1010,op7_4, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, + "vst1", Dt, "\\{$src1, $src2\\}, $addr", "", []>; + +def VST1d8 : VST1D<0b0000, "8">; +def VST1d16 : VST1D<0b0100, "16">; +def VST1d32 : VST1D<0b1000, "32">; +def VST1d64 : VST1D<0b1100, "64">; + +def VST1q8 : VST1Q<0b0000, "8">; +def VST1q16 : VST1Q<0b0100, "16">; +def VST1q32 : VST1Q<0b1000, "32">; +def VST1q64 : VST1Q<0b1100, "64">; // ...with address register writeback: class VST1DWB<bits<4> op7_4, string Dt> @@ -546,53 +535,50 @@ def VST1q16_UPD : VST1QWB<0b0100, "16">; def VST1q32_UPD : VST1QWB<0b1000, "32">; def VST1q64_UPD : VST1QWB<0b1100, "64">; -// These (dreg triple/quadruple) are for disassembly only. +// ...with 3 registers (some of these are only for the disassembler): class VST1D3<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), - IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", - [/* For disassembly only; pattern left blank */]>; -class VST1D4<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0010, op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", - [/* For disassembly only; pattern left blank */]>; - -def VST1d8T : VST1D3<0b0000, "8">; -def VST1d16T : VST1D3<0b0100, "16">; -def VST1d32T : VST1D3<0b1000, "32">; -// VST1d64T : implemented as VST3d64 - -def VST1d8Q : VST1D4<0b0000, "8">; -def VST1d16Q : VST1D4<0b0100, "16">; -def VST1d32Q : VST1D4<0b1000, "32">; -// VST1d64Q : implemented as VST4d64 - -// ...with address register writeback: + IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>; class VST1D3WB<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset", - "$addr.addr = $wb", - [/* For disassembly only; pattern left blank */]>; + "$addr.addr = $wb", []>; + +def VST1d8T : VST1D3<0b0000, "8">; +def VST1d16T : VST1D3<0b0100, "16">; +def VST1d32T : VST1D3<0b1000, "32">; +def VST1d64T : VST1D3<0b1100, "64">; + +def VST1d8T_UPD : VST1D3WB<0b0000, "8">; +def VST1d16T_UPD : VST1D3WB<0b0100, "16">; +def VST1d32T_UPD : VST1D3WB<0b1000, "32">; +def VST1d64T_UPD : VST1D3WB<0b1100, "64">; + +// ...with 4 registers (some of these are only for the disassembler): +class VST1D4<bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, 0b0010, op7_4, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), + IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", + []>; class VST1D4WB<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", - "$addr.addr = $wb", - [/* For disassembly only; pattern left blank */]>; + "$addr.addr = $wb", []>; -def VST1d8T_UPD : VST1D3WB<0b0000, "8">; -def VST1d16T_UPD : VST1D3WB<0b0100, "16">; -def VST1d32T_UPD : VST1D3WB<0b1000, "32">; -// VST1d64T_UPD : implemented as VST3d64_UPD +def VST1d8Q : VST1D4<0b0000, "8">; +def VST1d16Q : VST1D4<0b0100, "16">; +def VST1d32Q : VST1D4<0b1000, "32">; +def VST1d64Q : VST1D4<0b1100, "64">; def VST1d8Q_UPD : VST1D4WB<0b0000, "8">; def VST1d16Q_UPD : VST1D4WB<0b0100, "16">; def VST1d32Q_UPD : VST1D4WB<0b1000, "32">; -// VST1d64Q_UPD : implemented as VST4d64_UPD +def VST1d64Q_UPD : VST1D4WB<0b1100, "64">; // VST2 : Vector Store (multiple 2-element structures) class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -608,9 +594,6 @@ class VST2Q<bits<4> op7_4, string Dt> def VST2d8 : VST2D<0b1000, 0b0000, "8">; def VST2d16 : VST2D<0b1000, 0b0100, "16">; def VST2d32 : VST2D<0b1000, 0b1000, "32">; -def VST2d64 : NLdSt<0,0b00,0b1010,0b1100, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, - "vst1", "64", "\\{$src1, $src2\\}, $addr", "", []>; def VST2q8 : VST2Q<0b0000, "8">; def VST2q16 : VST2Q<0b0100, "16">; @@ -632,11 +615,6 @@ class VST2QWB<bits<4> op7_4, string Dt> def VST2d8_UPD : VST2DWB<0b1000, 0b0000, "8">; def VST2d16_UPD : VST2DWB<0b1000, 0b0100, "16">; def VST2d32_UPD : VST2DWB<0b1000, 0b1000, "32">; -def VST2d64_UPD : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2), IIC_VST, - "vst1", "64", "\\{$src1, $src2\\}, $addr$offset", - "$addr.addr = $wb", []>; def VST2q8_UPD : VST2QWB<0b0000, "8">; def VST2q16_UPD : VST2QWB<0b0100, "16">; @@ -659,10 +637,6 @@ class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> def VST3d8 : VST3D<0b0100, 0b0000, "8">; def VST3d16 : VST3D<0b0100, 0b0100, "16">; def VST3d32 : VST3D<0b0100, 0b1000, "32">; -def VST3d64 : NLdSt<0,0b00,0b0110,0b1100, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), - IIC_VST, - "vst1", "64", "\\{$src1, $src2, $src3\\}, $addr", "", []>; // ...with address register writeback: class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -675,11 +649,6 @@ class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">; def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">; def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">; -def VST3d64_UPD : NLdSt<0,0b00,0b0110,0b1100, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - "vst1", "64", "\\{$src1, $src2, $src3\\}, $addr$offset", - "$addr.addr = $wb", []>; // ...with double-spaced registers (non-updating versions for disassembly only): def VST3q8 : VST3D<0b0101, 0b0000, "8">; @@ -704,11 +673,6 @@ class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> def VST4d8 : VST4D<0b0000, 0b0000, "8">; def VST4d16 : VST4D<0b0000, 0b0100, "16">; def VST4d32 : VST4D<0b0000, 0b1000, "32">; -def VST4d64 : NLdSt<0,0b00,0b0010,0b1100, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - DPR:$src4), IIC_VST, - "vst1", "64", "\\{$src1, $src2, $src3, $src4\\}, $addr", - "", []>; // ...with address register writeback: class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -721,12 +685,6 @@ class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">; def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">; def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">; -def VST4d64_UPD : NLdSt<0,0b00,0b0010,0b1100, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, - "vst1", "64", - "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", - "$addr.addr = $wb", []>; // ...with double-spaced registers (non-updating versions for disassembly only): def VST4q8 : VST4D<0b0001, 0b0000, "8">; @@ -745,109 +703,109 @@ def VST4q32odd_UPD : VST4DWB<0b0001, 0b1000, "32">; // FIXME: Not yet implemented. // VST2LN : Vector Store (single 2-element structure from one lane) -class VST2LN<bits<4> op11_8, string Dt> - : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs), +class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<1, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr", "", []>; -def VST2LNd8 : VST2LN<0b0001, "8">; -def VST2LNd16 : VST2LN<0b0101, "16"> { let Inst{5} = 0; } -def VST2LNd32 : VST2LN<0b1001, "32"> { let Inst{6} = 0; } +def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8">; +def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16">; +def VST2LNd32 : VST2LN<0b1001, {?,0,?,?}, "32">; // ...with double-spaced registers: -def VST2LNq16 : VST2LN<0b0101, "16"> { let Inst{5} = 1; } -def VST2LNq32 : VST2LN<0b1001, "32"> { let Inst{6} = 1; } +def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16">; +def VST2LNq32 : VST2LN<0b1001, {?,1,?,?}, "32">; // ...alternate versions to be allocated odd register numbers: -def VST2LNq16odd : VST2LN<0b0101, "16"> { let Inst{5} = 1; } -def VST2LNq32odd : VST2LN<0b1001, "32"> { let Inst{6} = 1; } +def VST2LNq16odd : VST2LN<0b0101, {?,?,1,?}, "16">; +def VST2LNq32odd : VST2LN<0b1001, {?,1,?,?}, "32">; // ...with address register writeback: -class VST2LNWB<bits<4> op11_8, string Dt> - : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb), +class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset", "$addr.addr = $wb", []>; -def VST2LNd8_UPD : VST2LNWB<0b0001, "8">; -def VST2LNd16_UPD : VST2LNWB<0b0101, "16"> { let Inst{5} = 0; } -def VST2LNd32_UPD : VST2LNWB<0b1001, "32"> { let Inst{6} = 0; } +def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8">; +def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16">; +def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,?,?}, "32">; -def VST2LNq16_UPD : VST2LNWB<0b0101, "16"> { let Inst{5} = 1; } -def VST2LNq32_UPD : VST2LNWB<0b1001, "32"> { let Inst{6} = 1; } +def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16">; +def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,?,?}, "32">; // VST3LN : Vector Store (single 3-element structure from one lane) -class VST3LN<bits<4> op11_8, string Dt> - : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs), +class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<1, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), IIC_VST, "vst3", Dt, "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>; -def VST3LNd8 : VST3LN<0b0010, "8"> { let Inst{4} = 0; } -def VST3LNd16 : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b00; } -def VST3LNd32 : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b000; } +def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8">; +def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16">; +def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32">; // ...with double-spaced registers: -def VST3LNq16 : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b10; } -def VST3LNq32 : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b100; } +def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16">; +def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32">; // ...alternate versions to be allocated odd register numbers: -def VST3LNq16odd : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b10; } -def VST3LNq32odd : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b100; } +def VST3LNq16odd : VST3LN<0b0110, {?,?,1,0}, "16">; +def VST3LNq32odd : VST3LN<0b1010, {?,1,0,0}, "32">; // ...with address register writeback: -class VST3LNWB<bits<4> op11_8, string Dt> - : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb), +class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), IIC_VST, "vst3", Dt, "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr$offset", "$addr.addr = $wb", []>; -def VST3LNd8_UPD : VST3LNWB<0b0010, "8"> { let Inst{4} = 0; } -def VST3LNd16_UPD : VST3LNWB<0b0110, "16"> { let Inst{5-4} = 0b00; } -def VST3LNd32_UPD : VST3LNWB<0b1010, "32"> { let Inst{6-4} = 0b000; } +def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8">; +def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16">; +def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32">; -def VST3LNq16_UPD : VST3LNWB<0b0110, "16"> { let Inst{5-4} = 0b10; } -def VST3LNq32_UPD : VST3LNWB<0b1010, "32"> { let Inst{6-4} = 0b100; } +def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16">; +def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32">; // VST4LN : Vector Store (single 4-element structure from one lane) -class VST4LN<bits<4> op11_8, string Dt> - : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs), +class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<1, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), IIC_VST, "vst4", Dt, "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr", "", []>; -def VST4LNd8 : VST4LN<0b0011, "8">; -def VST4LNd16 : VST4LN<0b0111, "16"> { let Inst{5} = 0; } -def VST4LNd32 : VST4LN<0b1011, "32"> { let Inst{6} = 0; } +def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8">; +def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16">; +def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32">; // ...with double-spaced registers: -def VST4LNq16 : VST4LN<0b0111, "16"> { let Inst{5} = 1; } -def VST4LNq32 : VST4LN<0b1011, "32"> { let Inst{6} = 1; } +def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16">; +def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32">; // ...alternate versions to be allocated odd register numbers: -def VST4LNq16odd : VST4LN<0b0111, "16"> { let Inst{5} = 1; } -def VST4LNq32odd : VST4LN<0b1011, "32"> { let Inst{6} = 1; } +def VST4LNq16odd : VST4LN<0b0111, {?,?,1,?}, "16">; +def VST4LNq32odd : VST4LN<0b1011, {?,1,?,?}, "32">; // ...with address register writeback: -class VST4LNWB<bits<4> op11_8, string Dt> - : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb), +class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), IIC_VST, "vst4", Dt, "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr$offset", "$addr.addr = $wb", []>; -def VST4LNd8_UPD : VST4LNWB<0b0011, "8">; -def VST4LNd16_UPD : VST4LNWB<0b0111, "16"> { let Inst{5} = 0; } -def VST4LNd32_UPD : VST4LNWB<0b1011, "32"> { let Inst{6} = 0; } +def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8">; +def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16">; +def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">; -def VST4LNq16_UPD : VST4LNWB<0b0111, "16"> { let Inst{5} = 1; } -def VST4LNq32_UPD : VST4LNWB<0b1011, "32"> { let Inst{6} = 1; } +def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">; +def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">; } // mayStore = 1, hasExtraSrcRegAllocReq = 1 @@ -906,18 +864,18 @@ class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), - (ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", + (ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt,"$dst, $src", "", [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>; class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), - (ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src", "", + (ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt,"$dst, $src", "", [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>; // Basic 2-register intrinsics, both double- and quad-register. class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, + bits<2> op17_16, bits<5> op11_7, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), @@ -966,8 +924,8 @@ class N3VS<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, - OpcodeStr, Dt, "$dst, $src1, $src2", "", []> { + (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, + IIC_VBIND, OpcodeStr, Dt, "$dst, $src1, $src2", "", []> { let isCommutable = Commutable; } @@ -975,7 +933,7 @@ class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2", "", [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { let isCommutable = Commutable; @@ -986,27 +944,28 @@ class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3VX<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, OpcodeStr, "$dst, $src1, $src2", "", [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]>{ let isCommutable = Commutable; } + class N3VDSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), - (Ty (NEONvduplane (Ty DPR_VFP2:$src2), imm:$lane)))))]>{ + (Ty (NEONvduplane (Ty DPR_VFP2:$src2),imm:$lane)))))]> { let isCommutable = 0; } class N3VDSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), - IIC_VMULi16D, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> { @@ -1017,7 +976,7 @@ class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 1, op4, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2", "", [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { let isCommutable = Commutable; @@ -1026,7 +985,7 @@ class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3VX<op24, op23, op21_20, op11_8, 1, op4, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, itin, OpcodeStr, "$dst, $src1, $src2", "", [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]>{ let isCommutable = Commutable; @@ -1036,7 +995,7 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8, ValueType ResTy, ValueType OpTy, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), @@ -1047,7 +1006,7 @@ class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), - IIC_VMULi16Q, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_8:$src2), @@ -1057,10 +1016,10 @@ class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, // Basic 3-register intrinsics, both double- and quad-register. class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, + Format f, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), f, itin, OpcodeStr, Dt, "$dst, $src1, $src2", "", [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { let isCommutable = Commutable; @@ -1069,7 +1028,7 @@ class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (Ty DPR:$dst), (Ty (IntOp (Ty DPR:$src1), (Ty (NEONvduplane (Ty DPR_VFP2:$src2), @@ -1080,19 +1039,18 @@ class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), - itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (Ty DPR:$dst), (Ty (IntOp (Ty DPR:$src1), - (Ty (NEONvduplane (Ty DPR_8:$src2), - imm:$lane)))))]> { + (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> { let isCommutable = 0; } class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, + Format f, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 1, op4, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), f, itin, OpcodeStr, Dt, "$dst, $src1, $src2", "", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { let isCommutable = Commutable; @@ -1102,7 +1060,7 @@ class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (IntOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), @@ -1114,7 +1072,7 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), - itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (IntOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_8:$src2), @@ -1128,14 +1086,14 @@ class N3VSMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, ValueType Ty, SDNode MulOp, SDNode OpNode> : N3V<op24, op23, op21_20, op11_8, 0, op4, (outs DPR_VFP2:$dst), - (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin, + (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), N3RegFrm, itin, OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>; class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode MulOp, SDNode OpNode> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin, + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", [(set DPR:$dst, (Ty (OpNode DPR:$src1, (Ty (MulOp DPR:$src2, DPR:$src3)))))]>; @@ -1144,7 +1102,8 @@ class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, ValueType Ty, SDNode MulOp, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, + (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), @@ -1156,7 +1115,8 @@ class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, ValueType Ty, SDNode MulOp, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, + (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), @@ -1168,7 +1128,7 @@ class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode MulOp, SDNode OpNode> : N3V<op24, op23, op21_20, op11_8, 1, op4, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin, + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin, OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", [(set QPR:$dst, (Ty (OpNode QPR:$src1, (Ty (MulOp QPR:$src2, QPR:$src3)))))]>; @@ -1177,7 +1137,8 @@ class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, SDNode MulOp, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, + (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), @@ -1190,7 +1151,8 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, SDNode MulOp, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, + (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), @@ -1204,7 +1166,7 @@ class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin, + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2), (OpTy DPR:$src3))))]>; @@ -1212,7 +1174,7 @@ class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, op23, op21_20, op11_8, 1, op4, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin, + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin, OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2), (OpTy QPR:$src3))))]>; @@ -1223,7 +1185,7 @@ class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, Intrinsic IntOp> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), itin, + (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>; @@ -1232,7 +1194,8 @@ class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), - (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, + (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (ResTy QPR:$dst), (ResTy (IntOp (ResTy QPR:$src1), @@ -1244,7 +1207,8 @@ class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), - (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, + (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (ResTy QPR:$dst), (ResTy (IntOp (ResTy QPR:$src1), @@ -1257,7 +1221,7 @@ class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, Intrinsic IntOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VBINi4D, + (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINi4D, OpcodeStr, Dt, "$dst, $src1, $src2", "", [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> { let isCommutable = Commutable; @@ -1268,7 +1232,7 @@ class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), itin, + (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2", "", [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> { let isCommutable = Commutable; @@ -1277,8 +1241,8 @@ class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, 1, op21_20, op11_8, 1, 0, - (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (IntOp (OpTy DPR:$src1), (OpTy (NEONvduplane (OpTy DPR_VFP2:$src2), @@ -1288,7 +1252,7 @@ class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), - itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (IntOp (OpTy DPR:$src1), (OpTy (NEONvduplane (OpTy DPR_8:$src2), @@ -1299,7 +1263,7 @@ class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), IIC_VSUBiD, + (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), N3RegFrm, IIC_VSUBiD, OpcodeStr, Dt, "$dst, $src1, $src2", "", [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> { let isCommutable = Commutable; @@ -1344,17 +1308,17 @@ class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, // Shift by immediate, // both double- and quad-register. class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, + Format f, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> : N2VImm<op24, op23, op11_8, op7, 0, op4, - (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), itin, + (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), f, itin, OpcodeStr, Dt, "$dst, $src, $SIMM", "", [(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>; class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, + Format f, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> : N2VImm<op24, op23, op11_8, op7, 1, op4, - (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin, + (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), f, itin, OpcodeStr, Dt, "$dst, $src, $SIMM", "", [(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>; @@ -1363,8 +1327,8 @@ class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2VImm<op24, op23, op11_8, op7, op6, op4, - (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VSHLiD, - OpcodeStr, Dt, "$dst, $src, $SIMM", "", + (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), N2RegVShLFrm, + IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src, $SIMM", "", [(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src), (i32 imm:$SIMM))))]>; @@ -1373,7 +1337,7 @@ class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2VImm<op24, op23, op11_8, op7, op6, op4, - (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin, + (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), N2RegVShRFrm, itin, OpcodeStr, Dt, "$dst, $src, $SIMM", "", [(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src), (i32 imm:$SIMM))))]>; @@ -1383,14 +1347,14 @@ class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), IIC_VPALiD, + (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD, OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", [(set DPR:$dst, (Ty (add DPR:$src1, (Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>; class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), IIC_VPALiD, + (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD, OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", [(set QPR:$dst, (Ty (add QPR:$src1, (Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>; @@ -1398,15 +1362,15 @@ class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, // Shift by immediate and insert, // both double- and quad-register. class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> + Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp> : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), IIC_VSHLiD, + (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), f, IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", [(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>; class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> + Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp> : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), IIC_VSHLiQ, + (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), f, IIC_VSHLiQ, OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", [(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>; @@ -1416,15 +1380,15 @@ class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2VImm<op24, op23, op11_8, op7, 0, op4, - (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VUNAD, - OpcodeStr, Dt, "$dst, $src, $SIMM", "", + (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), NVCVTFrm, + IIC_VUNAD, OpcodeStr, Dt, "$dst, $src, $SIMM", "", [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>; class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2VImm<op24, op23, op11_8, op7, 1, op4, - (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), IIC_VUNAQ, - OpcodeStr, Dt, "$dst, $src, $SIMM", "", + (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), NVCVTFrm, + IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src, $SIMM", "", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>; //===----------------------------------------------------------------------===// @@ -1568,24 +1532,24 @@ multiclass N2VLInt_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, // Neon 3-register vector intrinsics. // First with only element sizes of 16 and 32 bits: -multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, +multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, Intrinsic IntOp, bit Commutable = 0> { // 64-bit vector types. - def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, itinD16, + def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp, Commutable>; - def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, itinD32, + def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp, Commutable>; // 128-bit vector types. - def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, itinQ16, + def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp, Commutable>; - def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, itinQ32, + def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp, Commutable>; } @@ -1605,38 +1569,37 @@ multiclass N3VIntSL_HS<bits<4> op11_8, } // ....then also with element size of 8 bits: -multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, +multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, Intrinsic IntOp, bit Commutable = 0> - : N3VInt_HS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32, + : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, OpcodeStr, Dt, IntOp, Commutable> { - def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, itinD16, + def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp, Commutable>; - def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, itinQ16, + def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp, Commutable>; } // ....then also with element size of 64 bits: -multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, +multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, Intrinsic IntOp, bit Commutable = 0> - : N3VInt_QHS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32, + : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, OpcodeStr, Dt, IntOp, Commutable> { - def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, itinD32, + def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, OpcodeStr, !strconcat(Dt, "64"), v1i64, v1i64, IntOp, Commutable>; - def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, itinQ32, + def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, OpcodeStr, !strconcat(Dt, "64"), v2i64, v2i64, IntOp, Commutable>; } - // Neon Narrowing 3-register vector intrinsics, // source operand element sizes of 16, 32 and 64 bits: multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, @@ -1866,46 +1829,46 @@ multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, // Neon 2-register vector shift by immediate, +// with f of either N2RegVShLFrm or N2RegVShRFrm // element sizes of 8, 16, 32 and 64 bits: multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - SDNode OpNode> { + InstrItinClass itin, string OpcodeStr, string Dt, + SDNode OpNode, Format f> { // 64-bit vector types. - def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, itin, + def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin, OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } - def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, itin, + def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin, OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } - def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, itin, + def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin, OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } - def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, itin, + def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, f, itin, OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; // imm6 = xxxxxx // 128-bit vector types. - def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, itin, + def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin, OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } - def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, itin, + def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin, OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } - def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, itin, + def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin, OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } - def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, itin, + def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, f, itin, OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; // imm6 = xxxxxx } - // Neon Shift-Accumulate vector operations, // element sizes of 8, 16, 32 and 64 bits: multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, @@ -1947,41 +1910,43 @@ multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, // Neon Shift-Insert vector operations, +// with f of either N2RegVShLFrm or N2RegVShRFrm // element sizes of 8, 16, 32 and 64 bits: multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, - string OpcodeStr, SDNode ShOp> { + string OpcodeStr, SDNode ShOp, + Format f> { // 64-bit vector types. def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, - OpcodeStr, "8", v8i8, ShOp> { + f, OpcodeStr, "8", v8i8, ShOp> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, - OpcodeStr, "16", v4i16, ShOp> { + f, OpcodeStr, "16", v4i16, ShOp> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, - OpcodeStr, "32", v2i32, ShOp> { + f, OpcodeStr, "32", v2i32, ShOp> { let Inst{21} = 0b1; // imm6 = 1xxxxx } def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, - OpcodeStr, "64", v1i64, ShOp>; + f, OpcodeStr, "64", v1i64, ShOp>; // imm6 = xxxxxx // 128-bit vector types. def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, - OpcodeStr, "8", v16i8, ShOp> { + f, OpcodeStr, "8", v16i8, ShOp> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, - OpcodeStr, "16", v8i16, ShOp> { + f, OpcodeStr, "16", v8i16, ShOp> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, - OpcodeStr, "32", v4i32, ShOp> { + f, OpcodeStr, "32", v4i32, ShOp> { let Inst{21} = 0b1; // imm6 = 1xxxxx } def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, - OpcodeStr, "64", v2i64, ShOp>; + f, OpcodeStr, "64", v2i64, ShOp>; // imm6 = xxxxxx } @@ -2044,20 +2009,26 @@ defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl", "u", defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>; defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>; // VHADD : Vector Halving Add -defm VHADDs : N3VInt_QHS<0,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vhadd", "s", int_arm_neon_vhadds, 1>; -defm VHADDu : N3VInt_QHS<1,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vhadd", "u", int_arm_neon_vhaddu, 1>; +defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + "vhadd", "s", int_arm_neon_vhadds, 1>; +defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + "vhadd", "u", int_arm_neon_vhaddu, 1>; // VRHADD : Vector Rounding Halving Add -defm VRHADDs : N3VInt_QHS<0,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vrhadd", "s", int_arm_neon_vrhadds, 1>; -defm VRHADDu : N3VInt_QHS<1,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vrhadd", "u", int_arm_neon_vrhaddu, 1>; +defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + "vrhadd", "s", int_arm_neon_vrhadds, 1>; +defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + "vrhadd", "u", int_arm_neon_vrhaddu, 1>; // VQADD : Vector Saturating Add -defm VQADDs : N3VInt_QHSD<0,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vqadd", "s", int_arm_neon_vqadds, 1>; -defm VQADDu : N3VInt_QHSD<1,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vqadd", "u", int_arm_neon_vqaddu, 1>; +defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + "vqadd", "s", int_arm_neon_vqadds, 1>; +defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + "vqadd", "u", int_arm_neon_vqaddu, 1>; // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", int_arm_neon_vaddhn, 1>; @@ -2070,10 +2041,10 @@ defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", // VMUL : Vector Multiply (integer, polynomial and floating-point) defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; -def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul", "p8", - v8i8, v8i8, int_arm_neon_vmulp, 1>; -def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul", "p8", - v16i8, v16i8, int_arm_neon_vmulp, 1>; +def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", + "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; +def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", + "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul", "f32", v2f32, v2f32, fmul, 1>; def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul", "f32", @@ -2103,7 +2074,7 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VQDMULH : Vector Saturating Doubling Multiply Returning High Half -defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, +defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, @@ -2125,8 +2096,8 @@ def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half -defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, - IIC_VMULi16Q, IIC_VMULi32Q, +defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, + IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, @@ -2285,18 +2256,18 @@ defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl", "u", defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>; defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>; // VHSUB : Vector Halving Subtract -defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, - IIC_VBINi4Q, IIC_VBINi4Q, +defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vhsub", "s", int_arm_neon_vhsubs, 0>; -defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, - IIC_VBINi4Q, IIC_VBINi4Q, +defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vhsub", "u", int_arm_neon_vhsubu, 0>; // VQSUB : Vector Saturing Subtract -defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, - IIC_VBINi4Q, IIC_VBINi4Q, +defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vqsub", "s", int_arm_neon_vqsubs, 0>; -defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, - IIC_VBINi4Q, IIC_VBINi4Q, +defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vqsub", "u", int_arm_neon_vqsubu, 0>; // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", @@ -2323,8 +2294,8 @@ defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vcge", "s", NEONvcge, 0>; defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vcge", "u", NEONvcgeu, 0>; -def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", - v2i32, v2f32, NEONvcge, 0>; +def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, + NEONvcge, 0>; def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, NEONvcge, 0>; // For disassembly only. @@ -2351,21 +2322,27 @@ defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", "$dst, $src, #0">; // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) -def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge", "f32", - v2i32, v2f32, int_arm_neon_vacged, 0>; -def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, IIC_VBINQ, "vacge", "f32", - v4i32, v4f32, int_arm_neon_vacgeq, 0>; +def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", + "f32", v2i32, v2f32, int_arm_neon_vacged, 0>; +def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", + "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>; // VACGT : Vector Absolute Compare Greater Than (aka VCAGT) -def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, IIC_VBIND, "vacgt", "f32", - v2i32, v2f32, int_arm_neon_vacgtd, 0>; -def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, IIC_VBINQ, "vacgt", "f32", - v4i32, v4f32, int_arm_neon_vacgtq, 0>; +def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", + "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>; +def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", + "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>; // VTST : Vector Test Bits defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; // Vector Bitwise Operations. +def vnot8 : PatFrag<(ops node:$in), + (xor node:$in, (bitconvert (v8i8 immAllOnesV)))>; +def vnot16 : PatFrag<(ops node:$in), + (xor node:$in, (bitconvert (v16i8 immAllOnesV)))>; + + // VAND : Vector Bitwise AND def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", v2i32, v2i32, and, 1>; @@ -2386,74 +2363,80 @@ def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", // VBIC : Vector Bitwise Bit Clear (AND NOT) def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2), IIC_VBINiD, - "vbic", "$dst, $src1, $src2", "", - [(set DPR:$dst, (v2i32 (and DPR:$src1, - (vnot_conv DPR:$src2))))]>; + (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD, + "vbic", "$dst, $src1, $src2", "", + [(set DPR:$dst, (v2i32 (and DPR:$src1, + (vnot8 DPR:$src2))))]>; def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, - "vbic", "$dst, $src1, $src2", "", - [(set QPR:$dst, (v4i32 (and QPR:$src1, - (vnot_conv QPR:$src2))))]>; + (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ, + "vbic", "$dst, $src1, $src2", "", + [(set QPR:$dst, (v4i32 (and QPR:$src1, + (vnot16 QPR:$src2))))]>; // VORN : Vector Bitwise OR NOT def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2), IIC_VBINiD, - "vorn", "$dst, $src1, $src2", "", - [(set DPR:$dst, (v2i32 (or DPR:$src1, - (vnot_conv DPR:$src2))))]>; + (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD, + "vorn", "$dst, $src1, $src2", "", + [(set DPR:$dst, (v2i32 (or DPR:$src1, + (vnot8 DPR:$src2))))]>; def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, - "vorn", "$dst, $src1, $src2", "", - [(set QPR:$dst, (v4i32 (or QPR:$src1, - (vnot_conv QPR:$src2))))]>; + (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ, + "vorn", "$dst, $src1, $src2", "", + [(set QPR:$dst, (v4i32 (or QPR:$src1, + (vnot16 QPR:$src2))))]>; // VMVN : Vector Bitwise NOT def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, - (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD, - "vmvn", "$dst, $src", "", - [(set DPR:$dst, (v2i32 (vnot DPR:$src)))]>; + (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD, + "vmvn", "$dst, $src", "", + [(set DPR:$dst, (v2i32 (vnot8 DPR:$src)))]>; def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, - (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD, - "vmvn", "$dst, $src", "", - [(set QPR:$dst, (v4i32 (vnot QPR:$src)))]>; -def : Pat<(v2i32 (vnot_conv DPR:$src)), (VMVNd DPR:$src)>; -def : Pat<(v4i32 (vnot_conv QPR:$src)), (VMVNq QPR:$src)>; + (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD, + "vmvn", "$dst, $src", "", + [(set QPR:$dst, (v4i32 (vnot16 QPR:$src)))]>; +def : Pat<(v2i32 (vnot8 DPR:$src)), (VMVNd DPR:$src)>; +def : Pat<(v4i32 (vnot16 QPR:$src)), (VMVNq QPR:$src)>; // VBSL : Vector Bitwise Select def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2, DPR:$src3), IIC_VCNTiD, - "vbsl", "$dst, $src2, $src3", "$src1 = $dst", - [(set DPR:$dst, - (v2i32 (or (and DPR:$src2, DPR:$src1), - (and DPR:$src3, (vnot_conv DPR:$src1)))))]>; + (ins DPR:$src1, DPR:$src2, DPR:$src3), + N3RegFrm, IIC_VCNTiD, + "vbsl", "$dst, $src2, $src3", "$src1 = $dst", + [(set DPR:$dst, + (v2i32 (or (and DPR:$src2, DPR:$src1), + (and DPR:$src3, (vnot8 DPR:$src1)))))]>; def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2, QPR:$src3), IIC_VCNTiQ, - "vbsl", "$dst, $src2, $src3", "$src1 = $dst", - [(set QPR:$dst, - (v4i32 (or (and QPR:$src2, QPR:$src1), - (and QPR:$src3, (vnot_conv QPR:$src1)))))]>; + (ins QPR:$src1, QPR:$src2, QPR:$src3), + N3RegFrm, IIC_VCNTiQ, + "vbsl", "$dst, $src2, $src3", "$src1 = $dst", + [(set QPR:$dst, + (v4i32 (or (and QPR:$src2, QPR:$src1), + (and QPR:$src3, (vnot16 QPR:$src1)))))]>; // VBIF : Vector Bitwise Insert if False // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), - IIC_VBINiD, "vbif", "$dst, $src2, $src3", "$src1 = $dst", + N3RegFrm, IIC_VBINiD, + "vbif", "$dst, $src2, $src3", "$src1 = $dst", [/* For disassembly only; pattern left blank */]>; def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), - IIC_VBINiQ, "vbif", "$dst, $src2, $src3", "$src1 = $dst", + N3RegFrm, IIC_VBINiQ, + "vbif", "$dst, $src2, $src3", "$src1 = $dst", [/* For disassembly only; pattern left blank */]>; // VBIT : Vector Bitwise Insert if True // like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), - IIC_VBINiD, "vbit", "$dst, $src2, $src3", "$src1 = $dst", + N3RegFrm, IIC_VBINiD, + "vbit", "$dst, $src2, $src3", "$src1 = $dst", [/* For disassembly only; pattern left blank */]>; def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), - IIC_VBINiQ, "vbit", "$dst, $src2, $src3", "$src1 = $dst", + N3RegFrm, IIC_VBINiQ, + "vbit", "$dst, $src2, $src3", "$src1 = $dst", [/* For disassembly only; pattern left blank */]>; // VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking @@ -2463,15 +2446,15 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, // Vector Absolute Differences. // VABD : Vector Absolute Difference -defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, - IIC_VBINi4Q, IIC_VBINi4Q, +defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vabd", "s", int_arm_neon_vabds, 0>; -defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, - IIC_VBINi4Q, IIC_VBINi4Q, +defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vabd", "u", int_arm_neon_vabdu, 0>; -def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, IIC_VBIND, +def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>; -def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, IIC_VBINQ, +def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>; // VABDL : Vector Absolute Difference Long (Q = | D - D |) @@ -2491,36 +2474,40 @@ defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal", "u", int_arm_neon_vabalu>; // Vector Maximum and Minimum. // VMAX : Vector Maximum -defm VMAXs : N3VInt_QHS<0,0,0b0110,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vmax", "s", int_arm_neon_vmaxs, 1>; -defm VMAXu : N3VInt_QHS<1,0,0b0110,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vmax", "u", int_arm_neon_vmaxu, 1>; -def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax", "f32", - v2f32, v2f32, int_arm_neon_vmaxs, 1>; -def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax", "f32", - v4f32, v4f32, int_arm_neon_vmaxs, 1>; +defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + "vmax", "s", int_arm_neon_vmaxs, 1>; +defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + "vmax", "u", int_arm_neon_vmaxu, 1>; +def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, "vmax", + "f32", v2f32, v2f32, int_arm_neon_vmaxs, 1>; +def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmax", + "f32", v4f32, v4f32, int_arm_neon_vmaxs, 1>; // VMIN : Vector Minimum -defm VMINs : N3VInt_QHS<0,0,0b0110,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vmin", "s", int_arm_neon_vmins, 1>; -defm VMINu : N3VInt_QHS<1,0,0b0110,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vmin", "u", int_arm_neon_vminu, 1>; -def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin", "f32", - v2f32, v2f32, int_arm_neon_vmins, 1>; -def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin", "f32", - v4f32, v4f32, int_arm_neon_vmins, 1>; +defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + "vmin", "s", int_arm_neon_vmins, 1>; +defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, + IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + "vmin", "u", int_arm_neon_vminu, 1>; +def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, "vmin", + "f32", v2f32, v2f32, int_arm_neon_vmins, 1>; +def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmin", + "f32", v4f32, v4f32, int_arm_neon_vmins, 1>; // Vector Pairwise Operations. // VPADD : Vector Pairwise Add -def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, IIC_VBINiD, "vpadd", "i8", - v8i8, v8i8, int_arm_neon_vpadd, 0>; -def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, IIC_VBINiD, "vpadd", "i16", - v4i16, v4i16, int_arm_neon_vpadd, 0>; -def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, IIC_VBINiD, "vpadd", "i32", - v2i32, v2i32, int_arm_neon_vpadd, 0>; -def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, IIC_VBIND, "vpadd", "f32", - v2f32, v2f32, int_arm_neon_vpadd, 0>; +def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd", + "i8", v8i8, v8i8, int_arm_neon_vpadd, 0>; +def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd", + "i16", v4i16, v4i16, int_arm_neon_vpadd, 0>; +def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd", + "i32", v2i32, v2i32, int_arm_neon_vpadd, 0>; +def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, IIC_VBIND, "vpadd", + "f32", v2f32, v2f32, int_arm_neon_vpadd, 0>; // VPADDL : Vector Pairwise Add Long defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", @@ -2535,36 +2522,36 @@ defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", int_arm_neon_vpadalu>; // VPMAX : Vector Pairwise Maximum -def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax", "s8", - v8i8, v8i8, int_arm_neon_vpmaxs, 0>; -def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax", "s16", - v4i16, v4i16, int_arm_neon_vpmaxs, 0>; -def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax", "s32", - v2i32, v2i32, int_arm_neon_vpmaxs, 0>; -def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax", "u8", - v8i8, v8i8, int_arm_neon_vpmaxu, 0>; -def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax", "u16", - v4i16, v4i16, int_arm_neon_vpmaxu, 0>; -def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax", "u32", - v2i32, v2i32, int_arm_neon_vpmaxu, 0>; -def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VBINi4D, "vpmax", "f32", - v2f32, v2f32, int_arm_neon_vpmaxs, 0>; +def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", + "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; +def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", + "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; +def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", + "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; +def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", + "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; +def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", + "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; +def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", + "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; +def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINi4D, "vpmax", + "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; // VPMIN : Vector Pairwise Minimum -def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin", "s8", - v8i8, v8i8, int_arm_neon_vpmins, 0>; -def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin", "s16", - v4i16, v4i16, int_arm_neon_vpmins, 0>; -def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin", "s32", - v2i32, v2i32, int_arm_neon_vpmins, 0>; -def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin", "u8", - v8i8, v8i8, int_arm_neon_vpminu, 0>; -def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin", "u16", - v4i16, v4i16, int_arm_neon_vpminu, 0>; -def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin", "u32", - v2i32, v2i32, int_arm_neon_vpminu, 0>; -def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VBINi4D, "vpmin", "f32", - v2f32, v2f32, int_arm_neon_vpmins, 0>; +def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", + "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; +def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", + "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; +def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", + "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; +def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", + "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; +def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", + "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; +def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", + "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; +def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINi4D, "vpmin", + "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; // Vector Reciprocal and Reciprocal Square Root Estimate and Step. @@ -2583,10 +2570,10 @@ def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, v4f32, v4f32, int_arm_neon_vrecpe>; // VRECPS : Vector Reciprocal Step -def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, +def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, IIC_VRECSD, "vrecps", "f32", v2f32, v2f32, int_arm_neon_vrecps, 1>; -def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, +def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, IIC_VRECSQ, "vrecps", "f32", v4f32, v4f32, int_arm_neon_vrecps, 1>; @@ -2605,25 +2592,30 @@ def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, v4f32, v4f32, int_arm_neon_vrsqrte>; // VRSQRTS : Vector Reciprocal Square Root Step -def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, +def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, IIC_VRECSD, "vrsqrts", "f32", v2f32, v2f32, int_arm_neon_vrsqrts, 1>; -def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, +def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, IIC_VRECSQ, "vrsqrts", "f32", v4f32, v4f32, int_arm_neon_vrsqrts, 1>; // Vector Shifts. // VSHL : Vector Shift -defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, - IIC_VSHLiQ, "vshl", "s", int_arm_neon_vshifts, 0>; -defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, - IIC_VSHLiQ, "vshl", "u", int_arm_neon_vshiftu, 0>; +defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, N3RegVShFrm, + IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, + "vshl", "s", int_arm_neon_vshifts, 0>; +defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, N3RegVShFrm, + IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, + "vshl", "u", int_arm_neon_vshiftu, 0>; // VSHL : Vector Shift Left (Immediate) -defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; +defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl, + N2RegVShLFrm>; // VSHR : Vector Shift Right (Immediate) -defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs>; -defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru>; +defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs, + N2RegVShRFrm>; +defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru, + N2RegVShRFrm>; // VSHLL : Vector Shift Left Long defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; @@ -2649,28 +2641,37 @@ defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", NEONvshrn>; // VRSHL : Vector Rounding Shift -defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vrshl", "s", int_arm_neon_vrshifts,0>; -defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu,0>; +defm VRSHLs : N3VInt_QHSD<0, 0, 0b0101, 0, N3RegVShFrm, + IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, + "vrshl", "s", int_arm_neon_vrshifts, 0>; +defm VRSHLu : N3VInt_QHSD<1, 0, 0b0101, 0, N3RegVShFrm, + IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, + "vrshl", "u", int_arm_neon_vrshiftu, 0>; // VRSHR : Vector Rounding Shift Right -defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs>; -defm VRSHRu : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru>; +defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs, + N2RegVShRFrm>; +defm VRSHRu : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru, + N2RegVShRFrm>; // VRSHRN : Vector Rounding Shift Right and Narrow defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", NEONvrshrn>; // VQSHL : Vector Saturating Shift -defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqshl", "s", int_arm_neon_vqshifts,0>; -defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqshl", "u", int_arm_neon_vqshiftu,0>; +defm VQSHLs : N3VInt_QHSD<0, 0, 0b0100, 1, N3RegVShFrm, + IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, + "vqshl", "s", int_arm_neon_vqshifts, 0>; +defm VQSHLu : N3VInt_QHSD<1, 0, 0b0100, 1, N3RegVShFrm, + IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, + "vqshl", "u", int_arm_neon_vqshiftu, 0>; // VQSHL : Vector Saturating Shift Left (Immediate) -defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s", NEONvqshls>; -defm VQSHLui : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u", NEONvqshlu>; +defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls, + N2RegVShLFrm>; +defm VQSHLui : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu, + N2RegVShLFrm>; // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) -defm VQSHLsu : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D, "vqshlu","s",NEONvqshlsu>; +defm VQSHLsu : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu, + N2RegVShLFrm>; // VQSHRN : Vector Saturating Shift Right and Narrow defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", @@ -2683,12 +2684,12 @@ defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", NEONvqshrnsu>; // VQRSHL : Vector Saturating Rounding Shift -defm VQRSHLs : N3VInt_QHSD<0,0,0b0101,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqrshl", "s", - int_arm_neon_vqrshifts, 0>; -defm VQRSHLu : N3VInt_QHSD<1,0,0b0101,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqrshl", "u", - int_arm_neon_vqrshiftu, 0>; +defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, N3RegVShFrm, + IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, + "vqrshl", "s", int_arm_neon_vqrshifts, 0>; +defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, N3RegVShFrm, + IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, + "vqrshl", "u", int_arm_neon_vqrshiftu, 0>; // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", @@ -2708,9 +2709,9 @@ defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; // VSLI : Vector Shift Left and Insert -defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli>; +defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli, N2RegVShLFrm>; // VSRI : Vector Shift Right and Insert -defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri>; +defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri, N2RegVShRFrm>; // Vector Absolute and Saturating Absolute. @@ -2732,19 +2733,22 @@ defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, // Vector Negate. -def vneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; -def vneg_conv : PatFrag<(ops node:$in), (sub immAllZerosV_bc, node:$in)>; +def vneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; +def vneg8 : PatFrag<(ops node:$in), + (sub (bitconvert (v8i8 immAllZerosV)), node:$in)>; +def vneg16 : PatFrag<(ops node:$in), + (sub (bitconvert (v16i8 immAllZerosV)), node:$in)>; class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", - [(set DPR:$dst, (Ty (vneg DPR:$src)))]>; + [(set DPR:$dst, (Ty (vneg8 DPR:$src)))]>; class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", - [(set QPR:$dst, (Ty (vneg QPR:$src)))]>; + [(set QPR:$dst, (Ty (vneg16 QPR:$src)))]>; -// VNEG : Vector Negate +// VNEG : Vector Negate (integer) def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; @@ -2762,12 +2766,12 @@ def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, "vneg", "f32", "$dst, $src", "", [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>; -def : Pat<(v8i8 (vneg_conv DPR:$src)), (VNEGs8d DPR:$src)>; -def : Pat<(v4i16 (vneg_conv DPR:$src)), (VNEGs16d DPR:$src)>; -def : Pat<(v2i32 (vneg_conv DPR:$src)), (VNEGs32d DPR:$src)>; -def : Pat<(v16i8 (vneg_conv QPR:$src)), (VNEGs8q QPR:$src)>; -def : Pat<(v8i16 (vneg_conv QPR:$src)), (VNEGs16q QPR:$src)>; -def : Pat<(v4i32 (vneg_conv QPR:$src)), (VNEGs32q QPR:$src)>; +def : Pat<(v8i8 (vneg8 DPR:$src)), (VNEGs8d DPR:$src)>; +def : Pat<(v4i16 (vneg8 DPR:$src)), (VNEGs16d DPR:$src)>; +def : Pat<(v2i32 (vneg8 DPR:$src)), (VNEGs32d DPR:$src)>; +def : Pat<(v16i8 (vneg16 QPR:$src)), (VNEGs8q QPR:$src)>; +def : Pat<(v8i16 (vneg16 QPR:$src)), (VNEGs16q QPR:$src)>; +def : Pat<(v4i32 (vneg16 QPR:$src)), (VNEGs32q QPR:$src)>; // VQNEG : Vector Saturating Negate defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, @@ -2805,9 +2809,9 @@ def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, // VMOV : Vector Move (Register) def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), - IIC_VMOVD, "vmov", "$dst, $src", "", []>; + N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>; def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), - IIC_VMOVD, "vmov", "$dst, $src", "", []>; + N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>; // VMOV : Vector Move (Immediate) @@ -3048,30 +3052,29 @@ def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), // VDUP : Vector Duplicate Lane (from scalar to all elements) -class VDUPLND<bits<2> op19_18, bits<2> op17_16, - string OpcodeStr, string Dt, ValueType Ty> - : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0, - (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, - OpcodeStr, Dt, "$dst, $src[$lane]", "", - [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>; +class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, + ValueType Ty> + : NVDupLane<op19_16, 0, (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), + IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]", + [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>; -class VDUPLNQ<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, string Dt, +class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy> - : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0, - (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, - OpcodeStr, Dt, "$dst, $src[$lane]", "", - [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), imm:$lane)))]>; + : NVDupLane<op19_16, 1, (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), + IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]", + [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), + imm:$lane)))]>; // Inst{19-16} is partially specified depending on the element size. -def VDUPLN8d : VDUPLND<{?,?}, {?,1}, "vdup", "8", v8i8>; -def VDUPLN16d : VDUPLND<{?,?}, {1,0}, "vdup", "16", v4i16>; -def VDUPLN32d : VDUPLND<{?,1}, {0,0}, "vdup", "32", v2i32>; -def VDUPLNfd : VDUPLND<{?,1}, {0,0}, "vdup", "32", v2f32>; -def VDUPLN8q : VDUPLNQ<{?,?}, {?,1}, "vdup", "8", v16i8, v8i8>; -def VDUPLN16q : VDUPLNQ<{?,?}, {1,0}, "vdup", "16", v8i16, v4i16>; -def VDUPLN32q : VDUPLNQ<{?,1}, {0,0}, "vdup", "32", v4i32, v2i32>; -def VDUPLNfq : VDUPLNQ<{?,1}, {0,0}, "vdup", "32", v4f32, v2f32>; +def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8>; +def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16>; +def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32>; +def VDUPLNfd : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32>; +def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8>; +def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16>; +def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32>; +def VDUPLNfq : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32>; def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, @@ -3233,15 +3236,15 @@ def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; class VEXTd<string OpcodeStr, string Dt, ValueType Ty> : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$dst), - (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD, - OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", + (ins DPR:$lhs, DPR:$rhs, i32imm:$index), NVExtFrm, + IIC_VEXTD, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs), (Ty DPR:$rhs), imm:$index)))]>; class VEXTq<string OpcodeStr, string Dt, ValueType Ty> : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$dst), - (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ, - OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", + (ins QPR:$lhs, QPR:$rhs, i32imm:$index), NVExtFrm, + IIC_VEXTQ, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs), (Ty QPR:$rhs), imm:$index)))]>; @@ -3290,25 +3293,26 @@ def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; // VTBL : Vector Table Lookup def VTBL1 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst), - (ins DPR:$tbl1, DPR:$src), IIC_VTB1, + (ins DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTB1, "vtbl", "8", "$dst, \\{$tbl1\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>; let hasExtraSrcRegAllocReq = 1 in { def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst), - (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTB2, + (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTB2, "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2 DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; def VTBL3 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst), - (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTB3, + (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NVTBLFrm, IIC_VTB3, "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3 DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; def VTBL4 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst), - (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTB4, + (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), + NVTBLFrm, IIC_VTB4, "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; @@ -3317,26 +3321,27 @@ def VTBL4 // VTBX : Vector Table Extension def VTBX1 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst), - (ins DPR:$orig, DPR:$tbl1, DPR:$src), IIC_VTBX1, + (ins DPR:$orig, DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTBX1, "vtbx", "8", "$dst, \\{$tbl1\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1 DPR:$orig, DPR:$tbl1, DPR:$src)))]>; let hasExtraSrcRegAllocReq = 1 in { def VTBX2 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst), - (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTBX2, + (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTBX2, "vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; def VTBX3 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst), - (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTBX3, + (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), + NVTBLFrm, IIC_VTBX3, "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; def VTBX4 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, - DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTBX4, + DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NVTBLFrm, IIC_VTBX4, "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1, @@ -3396,12 +3401,12 @@ def : N3VSPat<fmul, VMULfd_sfp>; //let neverHasSideEffects = 1 in //def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32", -// v2f32, fmul, fadd>; +// v2f32, fmul, fadd>; //def : N3VSMulOpPat<fmul, fadd, VMLAfd_sfp>; //let neverHasSideEffects = 1 in //def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32", -// v2f32, fmul, fsub>; +// v2f32, fmul, fsub>; //def : N3VSMulOpPat<fmul, fsub, VMLSfd_sfp>; // Vector Absolute used for single-precision FP @@ -3421,14 +3426,14 @@ def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>; // Vector Maximum used for single-precision FP let neverHasSideEffects = 1 in def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst), - (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, + (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND, "vmax", "f32", "$dst, $src1, $src2", "", []>; def : N3VSPat<NEONfmax, VMAXfd_sfp>; // Vector Minimum used for single-precision FP let neverHasSideEffects = 1 in def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst), - (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, + (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND, "vmin", "f32", "$dst, $src1, $src2", "", []>; def : N3VSPat<NEONfmin, VMINfd_sfp>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index aca8230..0458389 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -545,7 +545,7 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1, // FP FMA Operations. // -def VMLAD : ADbI<0b11100, 0b00, 0, 0, +def VMLAD : ADbI_vmlX<0b11100, 0b00, 0, 0, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), IIC_fpMAC64, "vmla", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), @@ -558,7 +558,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, RegConstraint<"$dstin = $dst">; -def VNMLSD : ADbI<0b11100, 0b01, 0, 0, +def VNMLSD : ADbI_vmlX<0b11100, 0b01, 0, 0, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), IIC_fpMAC64, "vnmls", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), @@ -571,7 +571,7 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0, [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, RegConstraint<"$dstin = $dst">; -def VMLSD : ADbI<0b11100, 0b00, 1, 0, +def VMLSD : ADbI_vmlX<0b11100, 0b00, 1, 0, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), IIC_fpMAC64, "vmls", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), @@ -589,7 +589,7 @@ def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, (f64 DPR:$b))), def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)), (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>; -def VNMLAD : ADbI<0b11100, 0b01, 1, 0, +def VNMLAD : ADbI_vmlX<0b11100, 0b01, 1, 0, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), IIC_fpMAC64, "vnmla", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index bdbec30..cb762a4 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -341,6 +341,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned PReg = PMO.getReg(); unsigned PRegNum = PMO.isUndef() ? UINT_MAX : ARMRegisterInfo::getRegisterNumbering(PReg); + unsigned Count = 1; for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { int NewOffset = MemOps[i].Offset; @@ -350,11 +351,14 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, : ARMRegisterInfo::getRegisterNumbering(Reg); // AM4 - register numbers in ascending order. // AM5 - consecutive register numbers in ascending order. + // Can only do up to 16 double-word registers per insn. if (Reg != ARM::SP && NewOffset == Offset + (int)Size && - ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) { + ((isAM4 && RegNum > PRegNum) + || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) { Offset += Size; PRegNum = RegNum; + ++Count; } else { // Can't merge this in. Try merge the earlier ones first. MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 2dad7f1..9e55cd8 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -22,10 +22,6 @@ using namespace llvm; static cl::opt<bool> ReserveR9("arm-reserve-r9", cl::Hidden, cl::desc("Reserve R9, making it unavailable as GPR")); -static cl::opt<bool> -UseNEONFP("arm-use-neon-fp", - cl::desc("Use NEON for single-precision FP"), - cl::init(false), cl::Hidden); static cl::opt<bool> UseMOVT("arm-use-movt", @@ -35,7 +31,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, bool isT) : ARMArchVersion(V4) , ARMFPUType(None) - , UseNEONForSinglePrecisionFP(UseNEONFP) + , UseNEONForSinglePrecisionFP(false) + , SlowVMLx(false) , IsThumb(isT) , ThumbMode(Thumb1) , PostRAScheduler(false) @@ -115,14 +112,6 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, if (!isThumb() || hasThumb2()) PostRAScheduler = true; - - // Set CPU specific features. - if (CPUString == "cortex-a8") { - // On Cortex-a8, it's faster to perform some single-precision FP - // operations with NEON instructions. - if (UseNEONFP.getPosition() == 0) - UseNEONForSinglePrecisionFP = true; - } } /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol. diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 2dc81a4..fa56a91 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -50,6 +50,10 @@ protected: /// determine if NEON should actually be used. bool UseNEONForSinglePrecisionFP; + /// SlowVMLx - If the VFP2 instructions are available, indicates whether + /// the VML[AS] instructions are slow (if so, don't use them). + bool SlowVMLx; + /// IsThumb - True if we are in thumb mode, false if in ARM mode. bool IsThumb; @@ -119,6 +123,7 @@ protected: bool hasNEON() const { return ARMFPUType >= NEON; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } + bool useVMLx() const {return hasVFP2() && !SlowVMLx; } bool hasFP16() const { return HasFP16; } diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 4a7a1e4..ba736e3 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -841,7 +841,7 @@ GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2, raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << getFunctionNumber() << '_' << uid << '_' << uid2 << "_set_" << MBB->getNumber(); - return OutContext.GetOrCreateTemporarySymbol(Name.str()); + return OutContext.GetOrCreateSymbol(Name.str()); } MCSymbol *ARMAsmPrinter:: @@ -849,7 +849,7 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const { SmallString<60> Name; raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << uid << '_' << uid2; - return OutContext.GetOrCreateTemporarySymbol(Name.str()); + return OutContext.GetOrCreateSymbol(Name.str()); } void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum) { @@ -1132,6 +1132,11 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); else // Internal to current translation unit. + // + // When we place the LSDA into the TEXT section, the type info pointers + // need to be indirect and pc-rel. We accomplish this by using NLPs. + // However, sometimes the types are local to the file. So we need to + // fill in the value for the NLP in those cases. OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), OutContext), 4/*size*/, 0/*addrspace*/); @@ -1186,7 +1191,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { // FIXME: MOVE TO SHARED PLACE. unsigned Id = (unsigned)MI->getOperand(2).getImm(); const char *Prefix = MAI->getPrivateGlobalPrefix(); - MCSymbol *Label =OutContext.GetOrCreateTemporarySymbol(Twine(Prefix) + MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix) + "PC" + Twine(getFunctionNumber()) + "_" + Twine(Id)); OutStreamer.EmitLabel(Label); diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp index 7cb305f..ab2b06b 100644 --- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp @@ -75,7 +75,7 @@ GetJumpTableSymbol(const MachineOperand &MO) const { #endif // Create a symbol for the name. - return Ctx.GetOrCreateTemporarySymbol(Name.str()); + return Ctx.GetOrCreateSymbol(Name.str()); } MCSymbol *ARMMCInstLower:: @@ -91,7 +91,7 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const { #endif // Create a symbol for the name. - return Ctx.GetOrCreateTemporarySymbol(Name.str()); + return Ctx.GetOrCreateSymbol(Name.str()); } MCOperand ARMMCInstLower:: diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index c36fe63..7334259 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -46,10 +46,13 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, default: break; + case ARM::VLD1q8: + case ARM::VLD1q16: + case ARM::VLD1q32: + case ARM::VLD1q64: case ARM::VLD2d8: case ARM::VLD2d16: case ARM::VLD2d32: - case ARM::VLD2d64: case ARM::VLD2LNd8: case ARM::VLD2LNd16: case ARM::VLD2LNd32: @@ -83,7 +86,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VLD3d8: case ARM::VLD3d16: case ARM::VLD3d32: - case ARM::VLD3d64: + case ARM::VLD1d64T: case ARM::VLD3LNd8: case ARM::VLD3LNd16: case ARM::VLD3LNd32: @@ -128,7 +131,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VLD4d8: case ARM::VLD4d16: case ARM::VLD4d32: - case ARM::VLD4d64: + case ARM::VLD1d64Q: case ARM::VLD4LNd8: case ARM::VLD4LNd16: case ARM::VLD4LNd32: @@ -170,10 +173,13 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, Stride = 2; return true; + case ARM::VST1q8: + case ARM::VST1q16: + case ARM::VST1q32: + case ARM::VST1q64: case ARM::VST2d8: case ARM::VST2d16: case ARM::VST2d32: - case ARM::VST2d64: case ARM::VST2LNd8: case ARM::VST2LNd16: case ARM::VST2LNd32: @@ -207,7 +213,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3d8: case ARM::VST3d16: case ARM::VST3d32: - case ARM::VST3d64: + case ARM::VST1d64T: case ARM::VST3LNd8: case ARM::VST3LNd16: case ARM::VST3LNd32: @@ -252,7 +258,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4d8: case ARM::VST4d16: case ARM::VST4d32: - case ARM::VST4d64: + case ARM::VST1d64Q: case ARM::VST4LNd8: case ARM::VST4LNd16: case ARM::VST4LNd32: diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 57b65cf..85d5ca0 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -12,6 +12,9 @@ Reimplement 'select' in terms of 'SEL'. A few ARMv6T2 ops should be pattern matched: BFI, SBFX, and UBFX +Interesting optimization for PIC codegen on arm-linux: +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43129 + //===---------------------------------------------------------------------===// Crazy idea: Consider code that uses lots of 8-bit or 16-bit values. By the diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 29ae631..ad98839 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -200,6 +200,8 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, // It's illegal to emit pop instruction without operands. if (NumRegs) MBB.insert(MI, &*MIB); + else + MF.DeleteMachineInstr(MIB); return true; } diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index e4abcdb..55163f9 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -69,7 +69,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL = DebugLoc::getUnknownLoc(); if (I != MBB.end()) DL = I->getDebugLoc(); - if (RC == ARM::GPRRegisterClass) { + if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = @@ -93,7 +93,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL = DebugLoc::getUnknownLoc(); if (I != MBB.end()) DL = I->getDebugLoc(); - if (RC == ARM::GPRRegisterClass) { + if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp index 39f0749..d539e08 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.cpp +++ b/lib/Target/Alpha/AlphaInstrInfo.cpp @@ -301,7 +301,15 @@ bool AlphaInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TB bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) + if (I == MBB.begin()) + return false; + --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return false; + --I; + } + if (!isUnpredicatedTerminator(I)) return false; // Get the last instruction in the block. @@ -362,6 +370,11 @@ unsigned AlphaInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return 0; --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return 0; + --I; + } if (I->getOpcode() != Alpha::BR && I->getOpcode() != Alpha::COND_BRANCH_I && I->getOpcode() != Alpha::COND_BRANCH_F) diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td index e3c3993..2471688 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.td +++ b/lib/Target/Blackfin/BlackfinInstrInfo.td @@ -65,23 +65,23 @@ def HI16 : SDNodeXForm<imm, [{ //===----------------------------------------------------------------------===// def imm3 : PatLeaf<(imm), [{return isInt<3>(N->getSExtValue());}]>; -def uimm3 : PatLeaf<(imm), [{return isUint<3>(N->getZExtValue());}]>; -def uimm4 : PatLeaf<(imm), [{return isUint<4>(N->getZExtValue());}]>; -def uimm5 : PatLeaf<(imm), [{return isUint<5>(N->getZExtValue());}]>; +def uimm3 : PatLeaf<(imm), [{return isUInt<3>(N->getZExtValue());}]>; +def uimm4 : PatLeaf<(imm), [{return isUInt<4>(N->getZExtValue());}]>; +def uimm5 : PatLeaf<(imm), [{return isUInt<5>(N->getZExtValue());}]>; def uimm5m2 : PatLeaf<(imm), [{ uint64_t value = N->getZExtValue(); - return value % 2 == 0 && isUint<5>(value); + return value % 2 == 0 && isUInt<5>(value); }]>; def uimm6m4 : PatLeaf<(imm), [{ uint64_t value = N->getZExtValue(); - return value % 4 == 0 && isUint<6>(value); + return value % 4 == 0 && isUInt<6>(value); }]>; def imm7 : PatLeaf<(imm), [{return isInt<7>(N->getSExtValue());}]>; def imm16 : PatLeaf<(imm), [{return isInt<16>(N->getSExtValue());}]>; -def uimm16 : PatLeaf<(imm), [{return isUint<16>(N->getZExtValue());}]>; +def uimm16 : PatLeaf<(imm), [{return isUInt<16>(N->getZExtValue());}]>; def ximm16 : PatLeaf<(imm), [{ int64_t value = N->getSExtValue(); @@ -610,8 +610,7 @@ def MOVE_ncccc : F1<(outs NotCC:$cc), (ins JustCC:$sb), "cc = !cc;", []>; def MOVECC_zext : F1<(outs D:$dst), (ins JustCC:$cc), - "$dst = $cc;", - [/*(set D:$dst, (zext JustCC:$cc))*/]>; + "$dst = $cc;", []>; def MOVENCC_z : F1<(outs D:$dst), (ins NotCC:$cc), "$dst = cc;", []>; @@ -859,17 +858,5 @@ def : Pat<(BfinCall (i32 tglobaladdr:$dst)), (CALLa tglobaladdr:$dst)>; def : Pat<(BfinCall (i32 texternalsym:$dst)), (CALLa texternalsym:$dst)>; - -//def : Pat<(sext JustCC:$cc), -// (NEG (MOVECC_zext JustCC:$cc))>; -//def : Pat<(anyext JustCC:$cc), -// (MOVECC_zext JustCC:$cc)>; -def : Pat<(i16 (zext JustCC:$cc)), - (EXTRACT_SUBREG (MOVECC_zext JustCC:$cc), bfin_subreg_lo16)>; -def : Pat<(i16 (sext JustCC:$cc)), - (EXTRACT_SUBREG (NEG (MOVECC_zext JustCC:$cc)), bfin_subreg_lo16)>; -def : Pat<(i16 (anyext JustCC:$cc)), - (EXTRACT_SUBREG (MOVECC_zext JustCC:$cc), bfin_subreg_lo16)>; - def : Pat<(i16 (trunc D:$src)), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$src, D)), bfin_subreg_lo16)>; diff --git a/lib/Target/Blackfin/BlackfinIntrinsics.td b/lib/Target/Blackfin/BlackfinIntrinsics.td index bf02cfe..ce21b08 100644 --- a/lib/Target/Blackfin/BlackfinIntrinsics.td +++ b/lib/Target/Blackfin/BlackfinIntrinsics.td @@ -21,14 +21,14 @@ let TargetPrefix = "bfin", isTarget = 1 in { // Execute csync instruction with workarounds def int_bfin_csync : GCCBuiltin<"__builtin_bfin_csync">, - Intrinsic<[llvm_void_ty]>; + Intrinsic<[]>; // Execute ssync instruction with workarounds def int_bfin_ssync : GCCBuiltin<"__builtin_bfin_ssync">, - Intrinsic<[llvm_void_ty]>; + Intrinsic<[]>; // Execute idle instruction with workarounds def int_bfin_idle : GCCBuiltin<"__builtin_bfin_idle">, - Intrinsic<[llvm_void_ty]>; + Intrinsic<[]>; } diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp index b39a342..84dc9ca 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp @@ -164,7 +164,7 @@ void BlackfinRegisterInfo::loadConstant(MachineBasicBlock &MBB, return; } - if (isUint<16>(value)) { + if (isUInt<16>(value)) { BuildMI(MBB, I, DL, TII.get(BF::LOADuimm16), Reg).addImm(value); return; } @@ -255,13 +255,13 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, assert(FIPos==1 && "Bad frame index operand"); MI.getOperand(FIPos).ChangeToRegister(BaseReg, false); MI.getOperand(FIPos+1).setImm(Offset); - if (isUint<6>(Offset)) { + if (isUInt<6>(Offset)) { MI.setDesc(TII.get(isStore ? BF::STORE32p_uimm6m4 : BF::LOAD32p_uimm6m4)); return 0; } - if (BaseReg == BF::FP && isUint<7>(-Offset)) { + if (BaseReg == BF::FP && isUInt<7>(-Offset)) { MI.setDesc(TII.get(isStore ? BF::STORE32fp_nimm7m4 : BF::LOAD32fp_nimm7m4)); diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h index c960974..1f21511 100644 --- a/lib/Target/CellSPU/SPU.h +++ b/lib/Target/CellSPU/SPU.h @@ -15,7 +15,6 @@ #ifndef LLVM_TARGET_IBMCELLSPU_H #define LLVM_TARGET_IBMCELLSPU_H -#include "llvm/System/DataTypes.h" #include "llvm/Target/TargetMachine.h" namespace llvm { @@ -25,73 +24,7 @@ namespace llvm { FunctionPass *createSPUISelDag(SPUTargetMachine &TM); - /*--== Utility functions/predicates/etc used all over the place: --==*/ - //! Predicate test for a signed 10-bit value - /*! - \param Value The input value to be tested - - This predicate tests for a signed 10-bit value, returning the 10-bit value - as a short if true. - */ - template<typename T> - inline bool isS10Constant(T Value); - - template<> - inline bool isS10Constant<short>(short Value) { - int SExtValue = ((int) Value << (32 - 10)) >> (32 - 10); - return ((Value > 0 && Value <= (1 << 9) - 1) - || (Value < 0 && (short) SExtValue == Value)); - } - - template<> - inline bool isS10Constant<int>(int Value) { - return (Value >= -(1 << 9) && Value <= (1 << 9) - 1); - } - - template<> - inline bool isS10Constant<uint32_t>(uint32_t Value) { - return (Value <= ((1 << 9) - 1)); - } - - template<> - inline bool isS10Constant<int64_t>(int64_t Value) { - return (Value >= -(1 << 9) && Value <= (1 << 9) - 1); - } - - template<> - inline bool isS10Constant<uint64_t>(uint64_t Value) { - return (Value <= ((1 << 9) - 1)); - } - - //! Predicate test for an unsigned 10-bit value - /*! - \param Value The input value to be tested - - This predicate tests for an unsigned 10-bit value, returning the 10-bit value - as a short if true. - */ - inline bool isU10Constant(short Value) { - return (Value == (Value & 0x3ff)); - } - - inline bool isU10Constant(int Value) { - return (Value == (Value & 0x3ff)); - } - - inline bool isU10Constant(uint32_t Value) { - return (Value == (Value & 0x3ff)); - } - - inline bool isU10Constant(int64_t Value) { - return (Value == (Value & 0x3ff)); - } - - inline bool isU10Constant(uint64_t Value) { - return (Value == (Value & 0x3ff)); - } - extern Target TheCellSPUTarget; - } // Defines symbolic names for the SPU instructions. diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 396a921..90f8310 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -44,28 +44,28 @@ namespace { bool isI64IntS10Immediate(ConstantSDNode *CN) { - return isS10Constant(CN->getSExtValue()); + return isInt<10>(CN->getSExtValue()); } //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates bool isI32IntS10Immediate(ConstantSDNode *CN) { - return isS10Constant(CN->getSExtValue()); + return isInt<10>(CN->getSExtValue()); } //! ConstantSDNode predicate for i32 unsigned 10-bit immediate values bool isI32IntU10Immediate(ConstantSDNode *CN) { - return isU10Constant(CN->getSExtValue()); + return isUInt<10>(CN->getSExtValue()); } //! ConstantSDNode predicate for i16 sign-extended, 10-bit immediate values bool isI16IntS10Immediate(ConstantSDNode *CN) { - return isS10Constant(CN->getSExtValue()); + return isInt<10>(CN->getSExtValue()); } //! SDNode predicate for i16 sign-extended, 10-bit immediate values @@ -80,7 +80,7 @@ namespace { bool isI16IntU10Immediate(ConstantSDNode *CN) { - return isU10Constant((short) CN->getZExtValue()); + return isUInt<10>((short) CN->getZExtValue()); } //! SDNode predicate for i16 sign-extended, 10-bit immediate values diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index e863ee3..4b0d442 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -1107,7 +1107,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset, true, false); SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); - SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8); + unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass); + SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8); SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0, false, false, 0); Chain = Store.getOperand(0); @@ -1491,7 +1492,7 @@ SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG, return SDValue(); Value = Value >> 32; } - if (isS10Constant(Value)) + if (isInt<10>(Value)) return DAG.getTargetConstant(Value, ValueType); } diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index 2306665..86825c8 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -450,7 +450,15 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) + if (I == MBB.begin()) + return false; + --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return false; + --I; + } + if (!isUnpredicatedTerminator(I)) return false; // Get the last instruction in the block. @@ -513,6 +521,11 @@ SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { if (I == MBB.begin()) return 0; --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return 0; + --I; + } if (!isCondBranch(I) && !isUncondBranch(I)) return 0; diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 5068f77..6d1f87d 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -1268,7 +1268,12 @@ multiclass BitwiseAnd defm AND : BitwiseAnd; -// N.B.: vnot_conv is one of those special target selection pattern fragments, + +def vnot_cell_conv : PatFrag<(ops node:$in), + (xor node:$in, (bitconvert (v4i32 immAllOnesV)))>; + +// N.B.: vnot_cell_conv is one of those special target selection pattern +// fragments, // in which we expect there to be a bit_convert on the constant. Bear in mind // that llvm translates "not <reg>" to "xor <reg>, -1" (or in this case, a // constant -1 vector.) @@ -1301,7 +1306,7 @@ multiclass AndComplement def r8: ANDCRegInst<R8C>; // Sometimes, the xor pattern has a bitcast constant: - def v16i8_conv: ANDCVecInst<v16i8, vnot_conv>; + def v16i8_conv: ANDCVecInst<v16i8, vnot_cell_conv>; } defm ANDC : AndComplement; @@ -1934,7 +1939,7 @@ multiclass SelectBits def v16i8: SELBVecInst<v16i8>; def v8i16: SELBVecInst<v8i16>; def v4i32: SELBVecInst<v4i32>; - def v2i64: SELBVecInst<v2i64, vnot_conv>; + def v2i64: SELBVecInst<v2i64, vnot_cell_conv>; def r128: SELBRegInst<GPRC>; def r64: SELBRegInst<R64C>; @@ -4373,7 +4378,7 @@ def : Pat<(v2f64 (bitconvert (v16i8 VECREG:$src))), (v2f64 VECREG:$src)>; def : Pat<(v2f64 (bitconvert (v8i16 VECREG:$src))), (v2f64 VECREG:$src)>; def : Pat<(v2f64 (bitconvert (v4i32 VECREG:$src))), (v2f64 VECREG:$src)>; def : Pat<(v2f64 (bitconvert (v2i64 VECREG:$src))), (v2f64 VECREG:$src)>; -def : Pat<(v2f64 (bitconvert (v2f64 VECREG:$src))), (v2f64 VECREG:$src)>; +def : Pat<(v2f64 (bitconvert (v4f32 VECREG:$src))), (v2f64 VECREG:$src)>; def : Pat<(i128 (bitconvert (v16i8 VECREG:$src))), (ORi128_vec VECREG:$src)>; diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index 8c78bab..f3071f2 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetInstrInfo.h" @@ -336,6 +337,7 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); + DebugLoc dl = II->getDebugLoc(); while (!MI.getOperand(i).isFI()) { ++i; @@ -364,11 +366,22 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, // Replace the FrameIndex with base register with $sp (aka $r1) SPOp.ChangeToRegister(SPU::R1, false); - if (Offset > SPUFrameInfo::maxFrameOffset() - || Offset < SPUFrameInfo::minFrameOffset()) { - errs() << "Large stack adjustment (" - << Offset - << ") in SPURegisterInfo::eliminateFrameIndex."; + + // if 'Offset' doesn't fit to the D-form instruction's + // immediate, convert the instruction to X-form + // if the instruction is not an AI (which takes a s10 immediate), assume + // it is a load/store that can take a s14 immediate + if ((MI.getOpcode() == SPU::AIr32 && !isInt<10>(Offset)) + || !isInt<14>(Offset)) { + int newOpcode = convertDFormToXForm(MI.getOpcode()); + unsigned tmpReg = findScratchRegister(II, RS, &SPU::R32CRegClass, SPAdj); + BuildMI(MBB, II, dl, TII.get(SPU::ILr32), tmpReg ) + .addImm(Offset); + BuildMI(MBB, II, dl, TII.get(newOpcode), MI.getOperand(0).getReg()) + .addReg(tmpReg, RegState::Kill) + .addReg(SPU::R1); + // remove the replaced D-form instruction + MBB.erase(II); } else { MO.ChangeToImmediate(Offset); } @@ -423,6 +436,14 @@ void SPURegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, MF.getRegInfo().setPhysRegUnused(SPU::R0); MF.getRegInfo().setPhysRegUnused(SPU::R1); MF.getRegInfo().setPhysRegUnused(SPU::R2); + + MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterClass *RC = &SPU::R32CRegClass; + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + + } void SPURegisterInfo::emitPrologue(MachineFunction &MF) const @@ -448,7 +469,8 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const assert((FrameSize & 0xf) == 0 && "SPURegisterInfo::emitPrologue: FrameSize not aligned"); - if (FrameSize > 0 || MFI->hasCalls()) { + // the "empty" frame size is 16 - just the register scavenger spill slot + if (FrameSize > 16 || MFI->hasCalls()) { FrameSize = -(FrameSize + SPUFrameInfo::minStackSize()); if (hasDebugInfo) { // Mark effective beginning of when frame pointer becomes valid. @@ -460,14 +482,14 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const // for the ABI BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16) .addReg(SPU::R1); - if (isS10Constant(FrameSize)) { + if (isInt<10>(FrameSize)) { // Spill $sp to adjusted $sp BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize) .addReg(SPU::R1); // Adjust $sp by required amout BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1) .addImm(FrameSize); - } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) { + } else if (isInt<16>(FrameSize)) { // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use // $r2 to adjust $sp: BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2) @@ -475,7 +497,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const .addReg(SPU::R1); BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2) .addImm(FrameSize); - BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1) + BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1) .addReg(SPU::R2) .addReg(SPU::R1); BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1) @@ -549,9 +571,11 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const "Can only insert epilog into returning blocks"); assert((FrameSize & 0xf) == 0 && "SPURegisterInfo::emitEpilogue: FrameSize not aligned"); - if (FrameSize > 0 || MFI->hasCalls()) { + + // the "empty" frame size is 16 - just the register scavenger spill slot + if (FrameSize > 16 || MFI->hasCalls()) { FrameSize = FrameSize + SPUFrameInfo::minStackSize(); - if (isS10Constant(FrameSize + LinkSlotOffset)) { + if (isInt<10>(FrameSize + LinkSlotOffset)) { // Reload $lr, adjust $sp by required amount // Note: We do this to slightly improve dual issue -- not by much, but it // is an opportunity for dual issue. @@ -574,7 +598,7 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const .addReg(SPU::R2); BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0) .addImm(16) - .addReg(SPU::R2); + .addReg(SPU::R1); BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2). addReg(SPU::R2) .addImm(16); @@ -618,4 +642,43 @@ SPURegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); } +int +SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const +{ + switch(dFormOpcode) + { + case SPU::AIr32: return SPU::Ar32; + case SPU::LQDr32: return SPU::LQXr32; + case SPU::LQDr128: return SPU::LQXr128; + case SPU::LQDv16i8: return SPU::LQXv16i8; + case SPU::LQDv4f32: return SPU::LQXv4f32; + case SPU::STQDr32: return SPU::STQXr32; + case SPU::STQDr128: return SPU::STQXr128; + case SPU::STQDv16i8: return SPU::STQXv16i8; + case SPU::STQDv4i32: return SPU::STQXv4i32; + case SPU::STQDv4f32: return SPU::STQXv4f32; + + default: assert( false && "Unhandled D to X-form conversion"); + } + // default will assert, but need to return something to keep the + // compiler happy. + return dFormOpcode; +} + +// TODO this is already copied from PPC. Could this convenience function +// be moved to the RegScavenger class? +unsigned +SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II, + RegScavenger *RS, + const TargetRegisterClass *RC, + int SPAdj) const +{ + assert(RS && "Register scavenging must be on"); + unsigned Reg = RS->FindUnusedReg(RC); + if (Reg == 0) + Reg = RS->scavengeRegister(RC, II, SPAdj); + assert( Reg && "Register scavenger failed"); + return Reg; +} + #include "SPUGenRegisterInfo.inc" diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h index 48feb5c..0a70318 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.h +++ b/lib/Target/CellSPU/SPURegisterInfo.h @@ -53,6 +53,10 @@ namespace llvm { virtual const TargetRegisterClass* const * getCalleeSavedRegClasses(const MachineFunction *MF) const; + //! Allow for scavenging, so we can get scratch registers when needed. + virtual bool requiresRegisterScavenging(const MachineFunction &MF) const + { return true; } + //! Return the reserved registers BitVector getReservedRegs(const MachineFunction &MF) const; @@ -97,6 +101,21 @@ namespace llvm { //! Get DWARF debugging register number int getDwarfRegNum(unsigned RegNum, bool isEH) const; + + //! Convert D-form load/store to X-form load/store + /*! + Converts a regiser displacement load/store into a register-indexed + load/store for large stack frames, when the stack frame exceeds the + range of a s10 displacement. + */ + int convertDFormToXForm(int dFormOpcode) const; + + //! Acquire an unused register in an emergency. + unsigned findScratchRegister(MachineBasicBlock::iterator II, + RegScavenger *RS, + const TargetRegisterClass *RC, + int SPAdj) const; + }; } // end namespace llvm diff --git a/lib/Target/MBlaze/MBlazeIntrinsics.td b/lib/Target/MBlaze/MBlazeIntrinsics.td index 76eb563..82552fa 100644 --- a/lib/Target/MBlaze/MBlazeIntrinsics.td +++ b/lib/Target/MBlaze/MBlazeIntrinsics.td @@ -21,11 +21,11 @@ let TargetPrefix = "mblaze", isTarget = 1 in { [llvm_i32_ty], [IntrWriteMem]>; - class MBFSL_Put_Intrinsic : Intrinsic<[llvm_void_ty], + class MBFSL_Put_Intrinsic : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrWriteMem]>; - class MBFSL_PutT_Intrinsic : Intrinsic<[llvm_void_ty], + class MBFSL_PutT_Intrinsic : Intrinsic<[], [llvm_i32_ty], [IntrWriteMem]>; } diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp index ac41cc8..15d16ec 100644 --- a/lib/Target/MSIL/MSILWriter.cpp +++ b/lib/Target/MSIL/MSILWriter.cpp @@ -424,7 +424,7 @@ void MSILWriter::printPtrLoad(uint64_t N) { case Module::Pointer32: printSimpleInstruction("ldc.i4",utostr(N).c_str()); // FIXME: Need overflow test? - if (!isUInt32(N)) { + if (!isUInt<32>(N)) { errs() << "Value = " << utostr(N) << '\n'; llvm_unreachable("32-bit pointer overflowed"); } diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp index 32c6b04..f4d7d8a 100644 --- a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp +++ b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp @@ -59,7 +59,7 @@ GetJumpTableSymbol(const MachineOperand &MO) const { } // Create a symbol for the name. - return Ctx.GetOrCreateTemporarySymbol(Name.str()); + return Ctx.GetOrCreateSymbol(Name.str()); } MCSymbol *MSP430MCInstLower:: @@ -75,7 +75,7 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const { } // Create a symbol for the name. - return Ctx.GetOrCreateTemporarySymbol(Name.str()); + return Ctx.GetOrCreateSymbol(Name.str()); } MCOperand MSP430MCInstLower:: diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index 6372482..e584770 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -173,6 +173,8 @@ unsigned MSP430InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { while (I != MBB.begin()) { --I; + if (I->isDebugValue()) + continue; if (I->getOpcode() != MSP430::JMP && I->getOpcode() != MSP430::JCC) break; @@ -241,6 +243,9 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock::iterator I = MBB.end(); while (I != MBB.begin()) { --I; + if (I->isDebugValue()) + continue; + // Working from the bottom, when we see a non-terminator // instruction, we're done. if (!isUnpredicatedTerminator(I)) diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp index fb93706..1d5c511 100644 --- a/lib/Target/Mangler.cpp +++ b/lib/Target/Mangler.cpp @@ -235,10 +235,7 @@ std::string Mangler::getNameWithPrefix(const GlobalValue *GV, MCSymbol *Mangler::getSymbol(const GlobalValue *GV) { SmallString<60> NameStr; getNameWithPrefix(NameStr, GV, false); - if (!GV->hasPrivateLinkage()) - return Context.GetOrCreateSymbol(NameStr.str()); - - return Context.GetOrCreateTemporarySymbol(NameStr.str()); + return Context.GetOrCreateSymbol(NameStr.str()); } diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index fa4518d..e948917 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -26,8 +26,9 @@ // Floating Point Compare and Branch def SDT_MipsFPBrcond : SDTypeProfile<0, 3, [SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; -def SDT_MipsFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<0>, - SDTCisInt<2>]>; +def SDT_MipsFPCmp : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, + SDTCisSameAs<1, 2>, SDTCisFP<1>, + SDTCisInt<3>]>; def SDT_MipsFPSelectCC : SDTypeProfile<1, 4, [SDTCisInt<1>, SDTCisInt<4>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>; @@ -244,12 +245,13 @@ def MIPS_FCOND_NGT : PatLeaf<(i32 15)>; /// Floating Point Compare let hasDelaySlot = 1, Defs=[FCR31] in { def FCMP_S32 : FCC<0x0, (outs), (ins FGR32:$fs, FGR32:$ft, condcode:$cc), - "c.$cc.s $fs, $ft", [(MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc), - (implicit FCR31)]>; + "c.$cc.s $fs, $ft", + [(set FCR31, (MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc))]>; def FCMP_D32 : FCC<0x1, (outs), (ins AFGR64:$fs, AFGR64:$ft, condcode:$cc), - "c.$cc.d $fs, $ft", [(MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc), - (implicit FCR31)]>, Requires<[In32BitMode]>; + "c.$cc.d $fs, $ft", + [(set FCR31, (MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc))]>, + Requires<[In32BitMode]>; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 1a9bffc..85cf064 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -433,7 +433,15 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) + if (I == MBB.begin()) + return false; + --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return false; + --I; + } + if (!isUnpredicatedTerminator(I)) return false; // Get the last instruction in the block. @@ -562,6 +570,11 @@ RemoveBranch(MachineBasicBlock &MBB) const MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return 0; --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return 0; + --I; + } if (I->getOpcode() != Mips::J && GetCondFromBranchOpc(I->getOpcode()) == Mips::COND_INVALID) return 0; diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp index 2fb405e..da16e83 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.cpp +++ b/lib/Target/PIC16/PIC16InstrInfo.cpp @@ -226,6 +226,11 @@ bool PIC16InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // Get the terminator instruction. --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return true; + --I; + } // Handle unconditional branches. If the unconditional branch's target is // successor basic block then remove the unconditional branch. if (I->getOpcode() == PIC16::br_uncond && AllowModify) { diff --git a/lib/Target/PIC16/PIC16Section.cpp b/lib/Target/PIC16/PIC16Section.cpp index a96ebb8..2505b11 100644 --- a/lib/Target/PIC16/PIC16Section.cpp +++ b/lib/Target/PIC16/PIC16Section.cpp @@ -17,10 +17,9 @@ using namespace llvm; // This is the only way to create a PIC16Section. Sections created here // do not need to be explicitly deleted as they are managed by auto_ptrs. -PIC16Section *PIC16Section::Create(const StringRef &Name, - PIC16SectionType Ty, - const std::string &Address, - int Color, MCContext &Ctx) { +PIC16Section *PIC16Section::Create(StringRef Name, PIC16SectionType Ty, + StringRef Address, int Color, + MCContext &Ctx) { /// Determine the internal SectionKind info. /// Users of PIC16Section class should not need to know the internal @@ -59,8 +58,17 @@ PIC16Section *PIC16Section::Create(const StringRef &Name, } + // Copy strings into context allocated memory so they get free'd when the + // context is destroyed. + char *NameCopy = static_cast<char*>(Ctx.Allocate(Name.size(), 1)); + memcpy(NameCopy, Name.data(), Name.size()); + char *AddressCopy = static_cast<char*>(Ctx.Allocate(Address.size(), 1)); + memcpy(AddressCopy, Address.data(), Address.size()); + // Create the Section. - PIC16Section *S = new (Ctx) PIC16Section(Name, K, Address, Color); + PIC16Section *S = + new (Ctx) PIC16Section(StringRef(NameCopy, Name.size()), K, + StringRef(AddressCopy, Address.size()), Color); S->T = Ty; return S; } diff --git a/lib/Target/PIC16/PIC16Section.h b/lib/Target/PIC16/PIC16Section.h index 566f920..9039ca7 100644 --- a/lib/Target/PIC16/PIC16Section.h +++ b/lib/Target/PIC16/PIC16Section.h @@ -30,11 +30,11 @@ namespace llvm { PIC16SectionType T; /// Name of the section to uniquely identify it. - std::string Name; + StringRef Name; /// User can specify an address at which a section should be placed. /// Negative value here means user hasn't specified any. - std::string Address; + StringRef Address; /// Overlay information - Sections with same color can be overlaid on /// one another. @@ -43,17 +43,16 @@ namespace llvm { /// Total size of all data objects contained here. unsigned Size; - PIC16Section(const StringRef &name, SectionKind K, const std::string &addr, - int color) + PIC16Section(StringRef name, SectionKind K, StringRef addr, int color) : MCSection(K), Name(name), Address(addr), Color(color), Size(0) { } public: /// Return the name of the section. - const std::string &getName() const { return Name; } + StringRef getName() const { return Name; } /// Return the Address of the section. - const std::string &getAddress() const { return Address; } + StringRef getAddress() const { return Address; } /// Return the Color of the section. int getColor() const { return Color; } @@ -64,6 +63,8 @@ namespace llvm { void setSize(unsigned size) { Size = size; } /// Conatined data objects. + // FIXME: This vector is leaked because sections are allocated with a + // BumpPtrAllocator. std::vector<const GlobalVariable *>Items; /// Check section type. @@ -77,8 +78,8 @@ namespace llvm { PIC16SectionType getType() const { return T; } /// This would be the only way to create a section. - static PIC16Section *Create(const StringRef &Name, PIC16SectionType Ty, - const std::string &Address, int Color, + static PIC16Section *Create(StringRef Name, PIC16SectionType Ty, + StringRef Address, int Color, MCContext &Ctx); /// Override this as PIC16 has its own way of printing switching diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp index ed6fc9d..5adefd3 100644 --- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp @@ -309,8 +309,8 @@ namespace { const MCSymbol *&TOCEntry = TOC[Sym]; if (TOCEntry == 0) TOCEntry = OutContext. - GetOrCreateTemporarySymbol(StringRef(MAI->getPrivateGlobalPrefix()) + - "C" + Twine(LabelID++)); + GetOrCreateSymbol(StringRef(MAI->getPrivateGlobalPrefix()) + + "C" + Twine(LabelID++)); O << *TOCEntry << "@toc"; } @@ -674,14 +674,14 @@ static const MCSymbol *GetLazyPtr(const MCSymbol *Sym, MCContext &Ctx) { // Remove $stub suffix, add $lazy_ptr. SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()-5); TmpStr += "$lazy_ptr"; - return Ctx.GetOrCreateTemporarySymbol(TmpStr.str()); + return Ctx.GetOrCreateSymbol(TmpStr.str()); } static const MCSymbol *GetAnonSym(const MCSymbol *Sym, MCContext &Ctx) { // Add $tmp suffix to $stub, yielding $stub$tmp. SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()); TmpStr += "$tmp"; - return Ctx.GetOrCreateTemporarySymbol(TmpStr.str()); + return Ctx.GetOrCreateSymbol(TmpStr.str()); } void PPCDarwinAsmPrinter:: @@ -811,6 +811,11 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); else // Internal to current translation unit. + // + // When we place the LSDA into the TEXT section, the type info pointers + // need to be indirect and pc-rel. We accomplish this by using NLPs. + // However, sometimes the types are local to the file. So we need to + // fill in the value for the NLP in those cases. OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), OutContext), isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp index a752421..52948c8 100644 --- a/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -130,7 +130,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { } // If this branch is in range, ignore it. - if (isInt16(BranchSize)) { + if (isInt<16>(BranchSize)) { MBBStartOffset += 4; continue; } diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 9d79c0d..4f88d35d 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -470,11 +470,11 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, if (CC == ISD::SETEQ || CC == ISD::SETNE) { if (isInt32Immediate(RHS, Imm)) { // SETEQ/SETNE comparison with 16-bit immediate, fold it. - if (isUInt16(Imm)) + if (isUInt<16>(Imm)) return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, getI32Imm(Imm & 0xFFFF)), 0); // If this is a 16-bit signed immediate, fold it. - if (isInt16((int)Imm)) + if (isInt<16>((int)Imm)) return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, getI32Imm(Imm & 0xFFFF)), 0); @@ -494,7 +494,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, } Opc = PPC::CMPLW; } else if (ISD::isUnsignedIntSetCC(CC)) { - if (isInt32Immediate(RHS, Imm) && isUInt16(Imm)) + if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm)) return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, getI32Imm(Imm & 0xFFFF)), 0); Opc = PPC::CMPLW; @@ -511,11 +511,11 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, if (CC == ISD::SETEQ || CC == ISD::SETNE) { if (isInt64Immediate(RHS.getNode(), Imm)) { // SETEQ/SETNE comparison with 16-bit immediate, fold it. - if (isUInt16(Imm)) + if (isUInt<16>(Imm)) return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, getI32Imm(Imm & 0xFFFF)), 0); // If this is a 16-bit signed immediate, fold it. - if (isInt16(Imm)) + if (isInt<16>(Imm)) return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, getI32Imm(Imm & 0xFFFF)), 0); @@ -528,7 +528,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, // xoris r0,r3,0x1234 // cmpldi cr0,r0,0x5678 // beq cr0,L6 - if (isUInt32(Imm)) { + if (isUInt<32>(Imm)) { SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS, getI64Imm(Imm >> 16)), 0); return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor, @@ -537,7 +537,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, } Opc = PPC::CMPLD; } else if (ISD::isUnsignedIntSetCC(CC)) { - if (isInt64Immediate(RHS.getNode(), Imm) && isUInt16(Imm)) + if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm)) return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, getI64Imm(Imm & 0xFFFF)), 0); Opc = PPC::CMPLD; @@ -761,12 +761,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { unsigned Shift = 0; // If it can't be represented as a 32 bit value. - if (!isInt32(Imm)) { + if (!isInt<32>(Imm)) { Shift = CountTrailingZeros_64(Imm); int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift; // If the shifted value fits 32 bits. - if (isInt32(ImmSh)) { + if (isInt<32>(ImmSh)) { // Go with the shifted value. Imm = ImmSh; } else { @@ -785,7 +785,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { unsigned Hi = (Imm >> 16) & 0xFFFF; // Simple value. - if (isInt16(Imm)) { + if (isInt<16>(Imm)) { // Just the Lo bits. Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo)); } else if (Lo) { diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 2c072c1..e67666d 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5539,8 +5539,16 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { return false; } -EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align, - bool isSrcConst, bool isSrcStr, +/// getOptimalMemOpType - Returns the target specific optimal type for load +/// and store operations as a result of memset, memcpy, and memmove lowering. +/// If DstAlign is zero that means it's safe to destination alignment can +/// satisfy any constraint. Similarly if SrcAlign is zero it means there +/// isn't a need to check it against alignment requirement, probably because +/// the source does not need to be loaded. It returns EVT::Other if +/// SelectionDAG should be responsible for determining it. +EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool SafeToUseFP, SelectionDAG &DAG) const { if (this->PPCSubTarget.isPPC64()) { return MVT::i64; diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 9c390ac..19fefab 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -347,9 +347,16 @@ namespace llvm { virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; - virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align, - bool isSrcConst, bool isSrcStr, - SelectionDAG &DAG) const; + /// getOptimalMemOpType - Returns the target specific optimal type for load + /// and store operations as a result of memset, memcpy, and memmove lowering. + /// If DstAlign is zero that means it's safe to destination alignment can + /// satisfy any constraint. Similarly if SrcAlign is zero it means there + /// isn't a need to check it against alignment requirement, probably because + /// the source does not need to be loaded. It returns EVT::Other if + /// SelectionDAG should be responsible for determining it. + virtual EVT getOptimalMemOpType(uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool SafeToUseFP, SelectionDAG &DAG) const; /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 3ff8f27..256370f 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -15,6 +15,10 @@ // Altivec transformation functions and pattern fragments. // +// Since we canonicalize buildvectors to v16i8, all vnots "-1" operands will be +// of that type. +def vnot_ppc : PatFrag<(ops node:$in), + (xor node:$in, (bitconvert (v16i8 immAllOnesV)))>; def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ @@ -321,7 +325,8 @@ def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), [(set VRRC:$vD, (and (v4i32 VRRC:$vA), VRRC:$vB))]>; def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vandc $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (and (v4i32 VRRC:$vA), (vnot VRRC:$vB)))]>; + [(set VRRC:$vD, (and (v4i32 VRRC:$vA), + (vnot_ppc VRRC:$vB)))]>; def VCFSX : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vcfsx $vD, $vB, $UIMM", VecFP, @@ -435,7 +440,8 @@ def VSUM4UBS: VX1_Int<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs>; def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vnor $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vnot (or (v4i32 VRRC:$vA), VRRC:$vB)))]>; + [(set VRRC:$vD, (vnot_ppc (or (v4i32 VRRC:$vA), + VRRC:$vB)))]>; def VOR : VXForm_1<1156, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vor $vD, $vA, $vB", VecFP, [(set VRRC:$vD, (or (v4i32 VRRC:$vA), VRRC:$vB))]>; @@ -640,12 +646,11 @@ def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef), (VMRGHW VRRC:$vA, VRRC:$vA)>; // Logical Operations -def : Pat<(v4i32 (vnot VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>; -def : Pat<(v4i32 (vnot_conv VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>; +def : Pat<(v4i32 (vnot_ppc VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>; -def : Pat<(v4i32 (vnot_conv (or VRRC:$A, VRRC:$B))), +def : Pat<(v4i32 (vnot_ppc (or VRRC:$A, VRRC:$B))), (VNOR VRRC:$A, VRRC:$B)>; -def : Pat<(v4i32 (and VRRC:$A, (vnot_conv VRRC:$B))), +def : Pat<(v4i32 (and VRRC:$A, (vnot_ppc VRRC:$B))), (VANDC VRRC:$A, VRRC:$B)>; def : Pat<(fmul VRRC:$vA, VRRC:$vB), diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 9895bea..82c637e 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -213,7 +213,15 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) + if (I == MBB.begin()) + return false; + --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return false; + --I; + } + if (!isUnpredicatedTerminator(I)) return false; // Get the last instruction in the block. @@ -281,6 +289,11 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return 0; --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return 0; + --I; + } if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC) return 0; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 6e7880e..44c5fe6 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -512,7 +512,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineInstr *MI = I; DebugLoc dl = MI->getDebugLoc(); - if (isInt16(CalleeAmt)) { + if (isInt<16>(CalleeAmt)) { BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg).addReg(StackReg). addImm(CalleeAmt); } else { @@ -596,7 +596,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, else Reg = PPC::R0; - if (MaxAlign < TargetAlign && isInt16(FrameSize)) { + if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) { BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg) .addReg(PPC::R31) .addImm(FrameSize); @@ -798,7 +798,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // clear can be encoded. This is extremely uncommon, because normally you // only "std" to a stack slot that is at least 4-byte aligned, but it can // happen in invalid code. - if (isInt16(Offset) && (!isIXAddr || (Offset & 3) == 0)) { + if (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) { if (isIXAddr) Offset >>= 2; // The actual encoded value has the low two bits zero. MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); @@ -1375,8 +1375,9 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { if (!isPPC64) { // PPC32. if (ALIGN_STACK && MaxAlign > TargetAlign) { - assert(isPowerOf2_32(MaxAlign)&&isInt16(MaxAlign)&&"Invalid alignment!"); - assert(isInt16(NegFrameSize) && "Unhandled stack size and alignment!"); + assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && + "Invalid alignment!"); + assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!"); BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0) .addReg(PPC::R1) @@ -1390,7 +1391,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { .addReg(PPC::R1) .addReg(PPC::R1) .addReg(PPC::R0); - } else if (isInt16(NegFrameSize)) { + } else if (isInt<16>(NegFrameSize)) { BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1) .addReg(PPC::R1) .addImm(NegFrameSize) @@ -1408,8 +1409,9 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { } } else { // PPC64. if (ALIGN_STACK && MaxAlign > TargetAlign) { - assert(isPowerOf2_32(MaxAlign)&&isInt16(MaxAlign)&&"Invalid alignment!"); - assert(isInt16(NegFrameSize) && "Unhandled stack size and alignment!"); + assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && + "Invalid alignment!"); + assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!"); BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0) .addReg(PPC::X1) @@ -1422,7 +1424,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { .addReg(PPC::X1) .addReg(PPC::X1) .addReg(PPC::X0); - } else if (isInt16(NegFrameSize)) { + } else if (isInt<16>(NegFrameSize)) { BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1) .addReg(PPC::X1) .addImm(NegFrameSize / 4) @@ -1591,7 +1593,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, // enabled (=> hasFastCall()==true) the fastcc call might contain a tail // call which invalidates the stack pointer value in SP(0). So we use the // value of R31 in this case. - if (FI->hasFastCall() && isInt16(FrameSize)) { + if (FI->hasFastCall() && isInt<16>(FrameSize)) { assert(hasFP(MF) && "Expecting a valid the frame pointer."); BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1) .addReg(PPC::R31).addImm(FrameSize); @@ -1605,7 +1607,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, .addReg(PPC::R1) .addReg(PPC::R31) .addReg(PPC::R0); - } else if (isInt16(FrameSize) && + } else if (isInt<16>(FrameSize) && (!ALIGN_STACK || TargetAlign >= MaxAlign) && !MFI->hasVarSizedObjects()) { BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1) @@ -1615,7 +1617,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, .addImm(0).addReg(PPC::R1); } } else { - if (FI->hasFastCall() && isInt16(FrameSize)) { + if (FI->hasFastCall() && isInt<16>(FrameSize)) { assert(hasFP(MF) && "Expecting a valid the frame pointer."); BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1) .addReg(PPC::X31).addImm(FrameSize); @@ -1629,7 +1631,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, .addReg(PPC::X1) .addReg(PPC::X31) .addReg(PPC::X0); - } else if (isInt16(FrameSize) && TargetAlign >= MaxAlign && + } else if (isInt<16>(FrameSize) && TargetAlign >= MaxAlign && !MFI->hasVarSizedObjects()) { BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1) .addReg(PPC::X1).addImm(FrameSize); @@ -1678,7 +1680,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS; unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI; - if (CallerAllocatedAmt && isInt16(CallerAllocatedAmt)) { + if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg) .addReg(StackReg).addImm(CallerAllocatedAmt); } else { diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index f46840c..8c5e905 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -316,19 +316,19 @@ def FBCONVF64 : Pseudo<(outs FP64:$dst), (ins GR64:$src), let Defs = [PSW] in { def FCMP32rr : Pseudo<(outs), (ins FP32:$src1, FP32:$src2), "cebr\t$src1, $src2", - [(SystemZcmp FP32:$src1, FP32:$src2), (implicit PSW)]>; + [(set PSW, (SystemZcmp FP32:$src1, FP32:$src2))]>; def FCMP64rr : Pseudo<(outs), (ins FP64:$src1, FP64:$src2), "cdbr\t$src1, $src2", - [(SystemZcmp FP64:$src1, FP64:$src2), (implicit PSW)]>; + [(set PSW, (SystemZcmp FP64:$src1, FP64:$src2))]>; def FCMP32rm : Pseudo<(outs), (ins FP32:$src1, rriaddr12:$src2), "ceb\t$src1, $src2", - [(SystemZcmp FP32:$src1, (load rriaddr12:$src2)), - (implicit PSW)]>; + [(set PSW, (SystemZcmp FP32:$src1, + (load rriaddr12:$src2)))]>; def FCMP64rm : Pseudo<(outs), (ins FP64:$src1, rriaddr12:$src2), "cdb\t$src1, $src2", - [(SystemZcmp FP64:$src1, (load rriaddr12:$src2)), - (implicit PSW)]>; + [(set PSW, (SystemZcmp FP64:$src1, + (load rriaddr12:$src2)))]>; } // Defs = [PSW] //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 5fa7e8c..06f01e7 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -424,6 +424,8 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock::iterator I = MBB.end(); while (I != MBB.begin()) { --I; + if (I->isDebugValue()) + continue; // Working from the bottom, when we see a non-terminator // instruction, we're done. if (!isUnpredicatedTerminator(I)) @@ -500,6 +502,8 @@ unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { while (I != MBB.begin()) { --I; + if (I->isDebugValue()) + continue; if (I->getOpcode() != SystemZ::JMP && getCondFromBranchOpc(I->getOpcode()) == SystemZCC::INVALID) break; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 0d1af23..22bde4e 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -31,7 +31,8 @@ class SDTCisI64<int OpNum> : SDTCisVT<OpNum, i64>; def SDT_SystemZCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; def SDT_SystemZCallSeqStart : SDCallSeqStart<[SDTCisI64<0>]>; def SDT_SystemZCallSeqEnd : SDCallSeqEnd<[SDTCisI64<0>, SDTCisI64<1>]>; -def SDT_CmpTest : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; +def SDT_CmpTest : SDTypeProfile<1, 2, [SDTCisI64<0>, + SDTCisSameAs<1, 2>]>; def SDT_BrCond : SDTypeProfile<0, 3, [SDTCisVT<0, OtherVT>, SDTCisI8<1>, SDTCisVT<2, i64>]>; @@ -980,100 +981,89 @@ let Defs = [PSW] in { def CMP32rr : RRI<0x19, (outs), (ins GR32:$src1, GR32:$src2), "cr\t$src1, $src2", - [(SystemZcmp GR32:$src1, GR32:$src2), - (implicit PSW)]>; + [(set PSW, (SystemZcmp GR32:$src1, GR32:$src2))]>; def CMP64rr : RREI<0xB920, (outs), (ins GR64:$src1, GR64:$src2), "cgr\t$src1, $src2", - [(SystemZcmp GR64:$src1, GR64:$src2), - (implicit PSW)]>; + [(set PSW, (SystemZcmp GR64:$src1, GR64:$src2))]>; def CMP32ri : RILI<0xC2D, (outs), (ins GR32:$src1, s32imm:$src2), "cfi\t$src1, $src2", - [(SystemZcmp GR32:$src1, imm:$src2), - (implicit PSW)]>; + [(set PSW, (SystemZcmp GR32:$src1, imm:$src2))]>; def CMP64ri32 : RILI<0xC2C, (outs), (ins GR64:$src1, s32imm64:$src2), "cgfi\t$src1, $src2", - [(SystemZcmp GR64:$src1, i64immSExt32:$src2), - (implicit PSW)]>; + [(set PSW, (SystemZcmp GR64:$src1, i64immSExt32:$src2))]>; def CMP32rm : RXI<0x59, (outs), (ins GR32:$src1, rriaddr12:$src2), "c\t$src1, $src2", - [(SystemZcmp GR32:$src1, (load rriaddr12:$src2)), - (implicit PSW)]>; + [(set PSW, (SystemZcmp GR32:$src1, (load rriaddr12:$src2)))]>; def CMP32rmy : RXYI<0xE359, (outs), (ins GR32:$src1, rriaddr:$src2), "cy\t$src1, $src2", - [(SystemZcmp GR32:$src1, (load rriaddr:$src2)), - (implicit PSW)]>; + [(set PSW, (SystemZcmp GR32:$src1, (load rriaddr:$src2)))]>; def CMP64rm : RXYI<0xE320, (outs), (ins GR64:$src1, rriaddr:$src2), "cg\t$src1, $src2", - [(SystemZcmp GR64:$src1, (load rriaddr:$src2)), - (implicit PSW)]>; + [(set PSW, (SystemZcmp GR64:$src1, (load rriaddr:$src2)))]>; def UCMP32rr : RRI<0x15, (outs), (ins GR32:$src1, GR32:$src2), "clr\t$src1, $src2", - [(SystemZucmp GR32:$src1, GR32:$src2), - (implicit PSW)]>; + [(set PSW, (SystemZucmp GR32:$src1, GR32:$src2))]>; def UCMP64rr : RREI<0xB921, (outs), (ins GR64:$src1, GR64:$src2), "clgr\t$src1, $src2", - [(SystemZucmp GR64:$src1, GR64:$src2), - (implicit PSW)]>; + [(set PSW, (SystemZucmp GR64:$src1, GR64:$src2))]>; def UCMP32ri : RILI<0xC2F, (outs), (ins GR32:$src1, i32imm:$src2), "clfi\t$src1, $src2", - [(SystemZucmp GR32:$src1, imm:$src2), - (implicit PSW)]>; + [(set PSW, (SystemZucmp GR32:$src1, imm:$src2))]>; def UCMP64ri32 : RILI<0xC2E, (outs), (ins GR64:$src1, i64i32imm:$src2), "clgfi\t$src1, $src2", - [(SystemZucmp GR64:$src1, i64immZExt32:$src2), - (implicit PSW)]>; + [(set PSW,(SystemZucmp GR64:$src1, i64immZExt32:$src2))]>; def UCMP32rm : RXI<0x55, (outs), (ins GR32:$src1, rriaddr12:$src2), "cl\t$src1, $src2", - [(SystemZucmp GR32:$src1, (load rriaddr12:$src2)), - (implicit PSW)]>; + [(set PSW, (SystemZucmp GR32:$src1, + (load rriaddr12:$src2)))]>; def UCMP32rmy : RXYI<0xE355, (outs), (ins GR32:$src1, rriaddr:$src2), "cly\t$src1, $src2", - [(SystemZucmp GR32:$src1, (load rriaddr:$src2)), - (implicit PSW)]>; + [(set PSW, (SystemZucmp GR32:$src1, + (load rriaddr:$src2)))]>; def UCMP64rm : RXYI<0xE351, (outs), (ins GR64:$src1, rriaddr:$src2), "clg\t$src1, $src2", - [(SystemZucmp GR64:$src1, (load rriaddr:$src2)), - (implicit PSW)]>; + [(set PSW, (SystemZucmp GR64:$src1, + (load rriaddr:$src2)))]>; def CMPSX64rr32 : RREI<0xB930, (outs), (ins GR64:$src1, GR32:$src2), "cgfr\t$src1, $src2", - [(SystemZucmp GR64:$src1, (sext GR32:$src2)), - (implicit PSW)]>; + [(set PSW, (SystemZucmp GR64:$src1, + (sext GR32:$src2)))]>; def UCMPZX64rr32 : RREI<0xB931, (outs), (ins GR64:$src1, GR32:$src2), "clgfr\t$src1, $src2", - [(SystemZucmp GR64:$src1, (zext GR32:$src2)), - (implicit PSW)]>; + [(set PSW, (SystemZucmp GR64:$src1, + (zext GR32:$src2)))]>; def CMPSX64rm32 : RXYI<0xE330, (outs), (ins GR64:$src1, rriaddr:$src2), "cgf\t$src1, $src2", - [(SystemZucmp GR64:$src1, (sextloadi64i32 rriaddr:$src2)), - (implicit PSW)]>; + [(set PSW, (SystemZucmp GR64:$src1, + (sextloadi64i32 rriaddr:$src2)))]>; def UCMPZX64rm32 : RXYI<0xE331, (outs), (ins GR64:$src1, rriaddr:$src2), "clgf\t$src1, $src2", - [(SystemZucmp GR64:$src1, (zextloadi64i32 rriaddr:$src2)), - (implicit PSW)]>; + [(set PSW, (SystemZucmp GR64:$src1, + (zextloadi64i32 rriaddr:$src2)))]>; // FIXME: Add other crazy ucmp forms diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index c3dcf8e..66bb914 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -524,6 +524,11 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); else // Internal to current translation unit. + // + // When we place the LSDA into the TEXT section, the type info + // pointers need to be indirect and pc-rel. We accomplish this by + // using NLPs. However, sometimes the types are local to the file. So + // we need to fill in the value for the NLP in those cases. OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), OutContext), 4/*size*/, 0/*addrspace*/); diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index 7d29d97..c851ca3 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -83,7 +83,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { case X86II::MO_DARWIN_NONLAZY: case X86II::MO_DARWIN_NONLAZY_PIC_BASE: { Name += "$non_lazy_ptr"; - MCSymbol *Sym = Ctx.GetOrCreateTemporarySymbol(Name.str()); + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = getMachOMMI().getGVStubEntry(Sym); @@ -98,7 +98,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { } case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: { Name += "$non_lazy_ptr"; - MCSymbol *Sym = Ctx.GetOrCreateTemporarySymbol(Name.str()); + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = getMachOMMI().getHiddenGVStubEntry(Sym); if (StubSym.getPointer() == 0) { @@ -112,7 +112,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { } case X86II::MO_DARWIN_STUB: { Name += "$stub"; - MCSymbol *Sym = Ctx.GetOrCreateTemporarySymbol(Name.str()); + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = getMachOMMI().getFnStubEntry(Sym); if (StubSym.getPointer()) @@ -127,7 +127,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { Name.erase(Name.end()-5, Name.end()); StubSym = MachineModuleInfoImpl:: - StubValueTy(Ctx.GetOrCreateTemporarySymbol(Name.str()), false); + StubValueTy(Ctx.GetOrCreateSymbol(Name.str()), false); } return Sym; } @@ -287,7 +287,9 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break; case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; - case X86::V_SET0: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break; + case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break; + case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break; + case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break; case X86::MOV16r0: diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 4d3dedf..22285f1 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -15,6 +15,7 @@ tablegen(X86GenCallingConv.inc -gen-callingconv) tablegen(X86GenSubtarget.inc -gen-subtarget) set(sources + SSEDomainFix.cpp X86AsmBackend.cpp X86CodeEmitter.cpp X86COFFMachineModuleInfo.cpp diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp new file mode 100644 index 0000000..395ab57 --- /dev/null +++ b/lib/Target/X86/SSEDomainFix.cpp @@ -0,0 +1,499 @@ +//===- SSEDomainFix.cpp - Use proper int/float domain for SSE ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SSEDomainFix pass. +// +// Some SSE instructions like mov, and, or, xor are available in different +// variants for different operand types. These variant instructions are +// equivalent, but on Nehalem and newer cpus there is extra latency +// transferring data between integer and floating point domains. +// +// This pass changes the variant instructions to minimize domain crossings. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sse-domain-fix" +#include "X86InstrInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + +/// Allocate objects from a pool, allow objects to be recycled, and provide a +/// way of deleting everything. +template<typename T, unsigned PageSize = 64> +class PoolAllocator { + std::vector<T*> Pages, Avail; +public: + ~PoolAllocator() { Clear(); } + + T* Alloc() { + if (Avail.empty()) { + T *p = new T[PageSize]; + Pages.push_back(p); + Avail.reserve(PageSize); + for (unsigned n = 0; n != PageSize; ++n) + Avail.push_back(p+n); + } + T *p = Avail.back(); + Avail.pop_back(); + return p; + } + + // Allow object to be reallocated. It won't be reconstructed. + void Recycle(T *p) { + p->clear(); + Avail.push_back(p); + } + + // Destroy all objects, make sure there are no external pointers to them. + void Clear() { + Avail.clear(); + while (!Pages.empty()) { + delete[] Pages.back(); + Pages.pop_back(); + } + } +}; + +/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track +/// of execution domains. +/// +/// An open DomainValue represents a set of instructions that can still switch +/// execution domain. Multiple registers may refer to the same open +/// DomainValue - they will eventually be collapsed to the same execution +/// domain. +/// +/// A collapsed DomainValue represents a single register that has been forced +/// into one of more execution domains. There is a separate collapsed +/// DomainValue for each register, but it may contain multiple execution +/// domains. A register value is initially created in a single execution +/// domain, but if we were forced to pay the penalty of a domain crossing, we +/// keep track of the fact the the register is now available in multiple +/// domains. +struct DomainValue { + // Basic reference counting. + unsigned Refs; + + // Available domains. For an open DomainValue, it is the still possible + // domains for collapsing. For a collapsed DomainValue it is the domains where + // the register is available for free. + unsigned Mask; + + // Position of the last defining instruction. + unsigned Dist; + + // Twiddleable instructions using or defining these registers. + SmallVector<MachineInstr*, 8> Instrs; + + // Collapsed DomainValue have no instructions to twiddle - it simply keeps + // track of the domains where the registers are already available. + bool collapsed() const { return Instrs.empty(); } + + // Is any domain in mask available? + bool compat(unsigned mask) const { + return Mask & mask; + } + + // Mark domain as available. + void add(unsigned domain) { + Mask |= 1u << domain; + } + + // First domain available in mask. + unsigned firstDomain() const { + return CountTrailingZeros_32(Mask); + } + + DomainValue() { clear(); } + + void clear() { + Refs = Mask = Dist = 0; + Instrs.clear(); + } +}; + +static const unsigned NumRegs = 16; + +class SSEDomainFixPass : public MachineFunctionPass { + static char ID; + PoolAllocator<DomainValue> Pool; + + MachineFunction *MF; + const X86InstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineBasicBlock *MBB; + DomainValue **LiveRegs; + typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap; + LiveOutMap LiveOuts; + unsigned Distance; + +public: + SSEDomainFixPass() : MachineFunctionPass(&ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "SSE execution domain fixup"; + } + +private: + // Register mapping. + int RegIndex(unsigned Reg); + + // LiveRegs manipulations. + void SetLiveReg(int rx, DomainValue *DV); + void Kill(int rx); + void Force(int rx, unsigned domain); + void Collapse(DomainValue *dv, unsigned domain); + bool Merge(DomainValue *A, DomainValue *B); + + void enterBasicBlock(); + void visitGenericInstr(MachineInstr*); + void visitSoftInstr(MachineInstr*, unsigned mask); + void visitHardInstr(MachineInstr*, unsigned domain); +}; +} + +char SSEDomainFixPass::ID = 0; + +/// Translate TRI register number to an index into our smaller tables of +/// interesting registers. Return -1 for boring registers. +int SSEDomainFixPass::RegIndex(unsigned reg) { + // Registers are sorted lexicographically. + // We just need them to be consecutive, ordering doesn't matter. + assert(X86::XMM9 == X86::XMM0+NumRegs-1 && "Unexpected sort"); + reg -= X86::XMM0; + return reg < NumRegs ? reg : -1; +} + +/// Set LiveRegs[rx] = dv, updating reference counts. +void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) { + assert(unsigned(rx) < NumRegs && "Invalid index"); + if (!LiveRegs) + LiveRegs = (DomainValue**)calloc(sizeof(DomainValue*), NumRegs); + + if (LiveRegs[rx] == dv) + return; + if (LiveRegs[rx]) { + assert(LiveRegs[rx]->Refs && "Bad refcount"); + if (--LiveRegs[rx]->Refs == 0) Pool.Recycle(LiveRegs[rx]); + } + LiveRegs[rx] = dv; + if (dv) ++dv->Refs; +} + +// Kill register rx, recycle or collapse any DomainValue. +void SSEDomainFixPass::Kill(int rx) { + assert(unsigned(rx) < NumRegs && "Invalid index"); + if (!LiveRegs || !LiveRegs[rx]) return; + + // Before killing the last reference to an open DomainValue, collapse it to + // the first available domain. + if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->collapsed()) + Collapse(LiveRegs[rx], LiveRegs[rx]->firstDomain()); + else + SetLiveReg(rx, 0); +} + +/// Force register rx into domain. +void SSEDomainFixPass::Force(int rx, unsigned domain) { + assert(unsigned(rx) < NumRegs && "Invalid index"); + DomainValue *dv; + if (LiveRegs && (dv = LiveRegs[rx])) { + if (dv->collapsed()) + dv->add(domain); + else + Collapse(dv, domain); + } else { + // Set up basic collapsed DomainValue. + dv = Pool.Alloc(); + dv->Dist = Distance; + dv->add(domain); + SetLiveReg(rx, dv); + } +} + +/// Collapse open DomainValue into given domain. If there are multiple +/// registers using dv, they each get a unique collapsed DomainValue. +void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) { + assert(dv->compat(1u << domain) && "Cannot collapse"); + + // Collapse all the instructions. + while (!dv->Instrs.empty()) { + MachineInstr *mi = dv->Instrs.back(); + TII->SetSSEDomain(mi, domain); + dv->Instrs.pop_back(); + } + dv->Mask = 1u << domain; + + // If there are multiple users, give them new, unique DomainValues. + if (LiveRegs && dv->Refs > 1) { + for (unsigned rx = 0; rx != NumRegs; ++rx) + if (LiveRegs[rx] == dv) { + DomainValue *dv2 = Pool.Alloc(); + dv2->Dist = Distance; + dv2->add(domain); + SetLiveReg(rx, dv2); + } + } +} + +/// Merge - All instructions and registers in B are moved to A, and B is +/// released. +bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) { + assert(!A->collapsed() && "Cannot merge into collapsed"); + assert(!B->collapsed() && "Cannot merge from collapsed"); + if (A == B) + return true; + if (!A->compat(B->Mask)) + return false; + A->Mask &= B->Mask; + A->Dist = std::max(A->Dist, B->Dist); + A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); + for (unsigned rx = 0; rx != NumRegs; ++rx) + if (LiveRegs[rx] == B) + SetLiveReg(rx, A); + return true; +} + +void SSEDomainFixPass::enterBasicBlock() { + // Try to coalesce live-out registers from predecessors. + for (MachineBasicBlock::const_livein_iterator i = MBB->livein_begin(), + e = MBB->livein_end(); i != e; ++i) { + int rx = RegIndex(*i); + if (rx < 0) continue; + for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), + pe = MBB->pred_end(); pi != pe; ++pi) { + LiveOutMap::const_iterator fi = LiveOuts.find(*pi); + if (fi == LiveOuts.end()) continue; + DomainValue *pdv = fi->second[rx]; + if (!pdv) continue; + if (!LiveRegs || !LiveRegs[rx]) { + SetLiveReg(rx, pdv); + continue; + } + + // We have a live DomainValue from more than one predecessor. + if (LiveRegs[rx]->collapsed()) { + // We are already collapsed, but predecessor is not. Force him. + if (!pdv->collapsed()) + Collapse(pdv, LiveRegs[rx]->firstDomain()); + continue; + } + + // Currently open, merge in predecessor. + if (!pdv->collapsed()) + Merge(LiveRegs[rx], pdv); + else + Collapse(LiveRegs[rx], pdv->firstDomain()); + } + } +} + +// A hard instruction only works in one domain. All input registers will be +// forced into that domain. +void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) { + // Collapse all uses. + for (unsigned i = mi->getDesc().getNumDefs(), + e = mi->getDesc().getNumOperands(); i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) continue; + int rx = RegIndex(mo.getReg()); + if (rx < 0) continue; + Force(rx, domain); + } + + // Kill all defs and force them. + for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) continue; + int rx = RegIndex(mo.getReg()); + if (rx < 0) continue; + Kill(rx); + Force(rx, domain); + } +} + +// A soft instruction can be changed to work in other domains given by mask. +void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) { + // Scan the explicit use operands for incoming domains. + unsigned collmask = mask; + SmallVector<int, 4> used; + if (LiveRegs) + for (unsigned i = mi->getDesc().getNumDefs(), + e = mi->getDesc().getNumOperands(); i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) continue; + int rx = RegIndex(mo.getReg()); + if (rx < 0) continue; + if (DomainValue *dv = LiveRegs[rx]) { + // Is it possible to use this collapsed register for free? + if (dv->collapsed()) { + if (unsigned m = collmask & dv->Mask) + collmask = m; + } else if (dv->compat(collmask)) + used.push_back(rx); + else + Kill(rx); + } + } + + // If the collapsed operands force a single domain, propagate the collapse. + if (isPowerOf2_32(collmask)) { + unsigned domain = CountTrailingZeros_32(collmask); + TII->SetSSEDomain(mi, domain); + visitHardInstr(mi, domain); + return; + } + + // Kill off any remaining uses that don't match collmask, and build a list of + // incoming DomainValue that we want to merge. + SmallVector<DomainValue*,4> doms; + for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) { + int rx = *i; + DomainValue *dv = LiveRegs[rx]; + // This useless DomainValue could have been missed above. + if (!dv->compat(collmask)) { + Kill(*i); + continue; + } + // sorted, uniqued insert. + bool inserted = false; + for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end(); + i != e && !inserted; ++i) { + if (dv == *i) + inserted = true; + else if (dv->Dist < (*i)->Dist) { + inserted = true; + doms.insert(i, dv); + } + } + if (!inserted) + doms.push_back(dv); + } + + // doms are now sorted in order of appearance. Try to merge them all, giving + // priority to the latest ones. + DomainValue *dv = 0; + while (!doms.empty()) { + if (!dv) { + dv = doms.pop_back_val(); + continue; + } + + DomainValue *ThisDV = doms.pop_back_val(); + if (Merge(dv, ThisDV)) continue; + + for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i) + if (LiveRegs[*i] == ThisDV) + Kill(*i); + } + + // dv is the DomainValue we are going to use for this instruction. + if (!dv) + dv = Pool.Alloc(); + dv->Dist = Distance; + dv->Mask = collmask; + dv->Instrs.push_back(mi); + + // Finally set all defs and non-collapsed uses to dv. + for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) continue; + int rx = RegIndex(mo.getReg()); + if (rx < 0) continue; + if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) { + Kill(rx); + SetLiveReg(rx, dv); + } + } +} + +void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) { + // Process explicit defs, kill any XMM registers redefined. + for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) continue; + int rx = RegIndex(mo.getReg()); + if (rx < 0) continue; + Kill(rx); + } +} + +bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo()); + TRI = MF->getTarget().getRegisterInfo(); + MBB = 0; + LiveRegs = 0; + Distance = 0; + assert(NumRegs == X86::VR128RegClass.getNumRegs() && "Bad regclass"); + + // If no XMM registers are used in the function, we can skip it completely. + bool anyregs = false; + for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(), + E = X86::VR128RegClass.end(); I != E; ++I) + if (MF->getRegInfo().isPhysRegUsed(*I)) { + anyregs = true; + break; + } + if (!anyregs) return false; + + MachineBasicBlock *Entry = MF->begin(); + SmallPtrSet<MachineBasicBlock*, 16> Visited; + for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> > + DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited); + DFI != DFE; ++DFI) { + MBB = *DFI; + enterBasicBlock(); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) { + MachineInstr *mi = I; + if (mi->isDebugValue()) continue; + ++Distance; + std::pair<uint16_t, uint16_t> domp = TII->GetSSEDomain(mi); + if (domp.first) + if (domp.second) + visitSoftInstr(mi, domp.second); + else + visitHardInstr(mi, domp.first); + else if (LiveRegs) + visitGenericInstr(mi); + } + + // Save live registers at end of MBB - used by enterBasicBlock(). + if (LiveRegs) + LiveOuts.insert(std::make_pair(MBB, LiveRegs)); + LiveRegs = 0; + } + + // Clear the LiveOuts vectors. Should we also collapse any remaining + // DomainValues? + for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end(); + i != e; ++i) + free(i->second); + LiveOuts.clear(); + Pool.Clear(); + + return false; +} + +FunctionPass *llvm::createSSEDomainFixPass() { + return new SSEDomainFixPass(); +} diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index c753cf2..9be38a4 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -41,6 +41,10 @@ FunctionPass *createX86ISelDag(X86TargetMachine &TM, /// FunctionPass *createX86FloatingPointStackifierPass(); +/// createSSEDomainFixPass - This pass twiddles SSE opcodes to prevent domain +/// crossings. +FunctionPass *createSSEDomainFixPass(); + /// createX87FPRegKillInserterPass - This function returns a pass which /// inserts FP_REG_KILL instructions where needed. /// diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 2be51e1..6b62795 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -59,6 +59,9 @@ def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", [FeatureCMOV]>; def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true", "Bit testing of memory is slow">; +def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem", + "IsUAMemFast", "true", + "Fast unaligned memory access">; def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", "Support SSE 4a instructions">; @@ -98,8 +101,10 @@ def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>; -def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>; -def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, + FeatureFastUAMem]>; +def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, + FeatureFastUAMem]>; // Sandy Bridge does not have FMA def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>; @@ -160,10 +165,11 @@ def X86InstrInfo : InstrInfo { "hasAdSizePrefix", "Prefix", "hasREX_WPrefix", - "ImmTypeBits", - "FPFormBits", + "ImmT.Value", + "FPForm.Value", "hasLockPrefix", "SegOvrBits", + "ExeDomain.Value", "Opcode"]; let TSFlagsShifts = [0, 6, @@ -174,6 +180,7 @@ def X86InstrInfo : InstrInfo { 16, 19, 20, + 22, 24]; } diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp index 754a200..8e2928c3 100644 --- a/lib/Target/X86/X86AsmBackend.cpp +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -10,10 +10,14 @@ #include "llvm/Target/TargetAsmBackend.h" #include "X86.h" #include "X86FixupKinds.h" +#include "llvm/ADT/Twine.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MachObjectWriter.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Target/TargetAsmBackend.h" using namespace llvm; @@ -48,8 +52,135 @@ public: for (unsigned i = 0; i != Size; ++i) DF.getContents()[Fixup.Offset + i] = uint8_t(Value >> (i * 8)); } + + bool MayNeedRelaxation(const MCInst &Inst, + const SmallVectorImpl<MCAsmFixup> &Fixups) const; + + void RelaxInstruction(const MCInstFragment *IF, MCInst &Res) const; + + bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const; }; +static unsigned getRelaxedOpcode(unsigned Op) { + switch (Op) { + default: + return Op; + + case X86::JAE_1: return X86::JAE_4; + case X86::JA_1: return X86::JA_4; + case X86::JBE_1: return X86::JBE_4; + case X86::JB_1: return X86::JB_4; + case X86::JE_1: return X86::JE_4; + case X86::JGE_1: return X86::JGE_4; + case X86::JG_1: return X86::JG_4; + case X86::JLE_1: return X86::JLE_4; + case X86::JL_1: return X86::JL_4; + case X86::JMP_1: return X86::JMP_4; + case X86::JNE_1: return X86::JNE_4; + case X86::JNO_1: return X86::JNO_4; + case X86::JNP_1: return X86::JNP_4; + case X86::JNS_1: return X86::JNS_4; + case X86::JO_1: return X86::JO_4; + case X86::JP_1: return X86::JP_4; + case X86::JS_1: return X86::JS_4; + } +} + +bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst, + const SmallVectorImpl<MCAsmFixup> &Fixups) const { + // Check for a 1byte pcrel fixup, and enforce that we would know how to relax + // this instruction. + for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { + if (unsigned(Fixups[i].Kind) == X86::reloc_pcrel_1byte) { + assert(getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode()); + return true; + } + } + + return false; +} + +// FIXME: Can tblgen help at all here to verify there aren't other instructions +// we can relax? +void X86AsmBackend::RelaxInstruction(const MCInstFragment *IF, + MCInst &Res) const { + // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel. + unsigned RelaxedOp = getRelaxedOpcode(IF->getInst().getOpcode()); + + if (RelaxedOp == IF->getInst().getOpcode()) { + SmallString<256> Tmp; + raw_svector_ostream OS(Tmp); + IF->getInst().dump_pretty(OS); + llvm_report_error("unexpected instruction to relax: " + OS.str()); + } + + Res = IF->getInst(); + Res.setOpcode(RelaxedOp); +} + +/// WriteNopData - Write optimal nops to the output file for the \arg Count +/// bytes. This returns the number of bytes written. It may return 0 if +/// the \arg Count is more than the maximum optimal nops. +/// +/// FIXME this is X86 32-bit specific and should move to a better place. +bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { + static const uint8_t Nops[16][16] = { + // nop + {0x90}, + // xchg %ax,%ax + {0x66, 0x90}, + // nopl (%[re]ax) + {0x0f, 0x1f, 0x00}, + // nopl 0(%[re]ax) + {0x0f, 0x1f, 0x40, 0x00}, + // nopl 0(%[re]ax,%[re]ax,1) + {0x0f, 0x1f, 0x44, 0x00, 0x00}, + // nopw 0(%[re]ax,%[re]ax,1) + {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}, + // nopl 0L(%[re]ax) + {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}, + // nopl 0L(%[re]ax,%[re]ax,1) + {0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, + // nopw 0L(%[re]ax,%[re]ax,1) + {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, + // nopw %cs:0L(%[re]ax,%[re]ax,1) + {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, + // nopl 0(%[re]ax,%[re]ax,1) + // nopw 0(%[re]ax,%[re]ax,1) + {0x0f, 0x1f, 0x44, 0x00, 0x00, + 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}, + // nopw 0(%[re]ax,%[re]ax,1) + // nopw 0(%[re]ax,%[re]ax,1) + {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, + 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}, + // nopw 0(%[re]ax,%[re]ax,1) + // nopl 0L(%[re]ax) */ + {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, + 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}, + // nopl 0L(%[re]ax) + // nopl 0L(%[re]ax) + {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}, + // nopl 0L(%[re]ax) + // nopl 0L(%[re]ax,%[re]ax,1) + {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00} + }; + + // Write an optimal sequence for the first 15 bytes. + uint64_t OptimalCount = (Count < 16) ? Count : 15; + for (uint64_t i = 0, e = OptimalCount; i != e; i++) + OW->Write8(Nops[OptimalCount - 1][i]); + + // Finish with single byte nops. + for (uint64_t i = OptimalCount, e = Count; i != e; ++i) + OW->Write8(0x90); + + return true; +} + +/* *** */ + class ELFX86AsmBackend : public X86AsmBackend { public: ELFX86AsmBackend(const Target &T) diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 5d3edbb..c69eeb3 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -383,7 +383,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue(); // They have to fit in the 32-bit signed displacement field though. - if (isInt32(Disp)) { + if (isInt<32>(Disp)) { AM.Disp = (uint32_t)Disp; return X86SelectAddress(U->getOperand(0), AM); } @@ -427,7 +427,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { } } // Check for displacement overflow. - if (!isInt32(Disp)) + if (!isInt<32>(Disp)) break; // Ok, the GEP indices were covered by constant-offset and scaled-index // addressing. Update the address state and move on to examining the base. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 704f9c6..b24d5a1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -802,6 +802,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (!VT.is128BitVector()) { continue; } + setOperationAction(ISD::AND, SVT, Promote); AddPromotedToType (ISD::AND, SVT, MVT::v2i64); setOperationAction(ISD::OR, SVT, Promote); @@ -1008,7 +1009,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // FIXME: These should be based on subtarget info. Plus, the values should // be smaller when we are in optimizing for size mode. maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores - maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores + maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores setPrefLoopAlignment(16); benefitFromCodePlacementOpt = true; @@ -1066,23 +1067,37 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { } /// getOptimalMemOpType - Returns the target specific optimal type for load -/// and store operations as a result of memset, memcpy, and memmove -/// lowering. It returns MVT::iAny if SelectionDAG should be responsible for -/// determining it. +/// and store operations as a result of memset, memcpy, and memmove lowering. +/// If DstAlign is zero that means it's safe to destination alignment can +/// satisfy any constraint. Similarly if SrcAlign is zero it means there +/// isn't a need to check it against alignment requirement, probably because +/// the source does not need to be loaded. It returns EVT::Other if +/// SelectionDAG should be responsible for determining it. EVT -X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align, - bool isSrcConst, bool isSrcStr, +X86TargetLowering::getOptimalMemOpType(uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool SafeToUseFP, SelectionDAG &DAG) const { // FIXME: This turns off use of xmm stores for memset/memcpy on targets like // linux. This is because the stack realignment code can't handle certain // cases like PR2962. This should be removed when PR2962 is fixed. const Function *F = DAG.getMachineFunction().getFunction(); - bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat); - if (!NoImplicitFloatOps && Subtarget->getStackAlignment() >= 16) { - if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16) - return MVT::v4i32; - if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16) - return MVT::v4f32; + if (!F->hasFnAttr(Attribute::NoImplicitFloat)) { + if (Size >= 16 && + (Subtarget->isUnalignedMemAccessFast() || + ((DstAlign == 0 || DstAlign >= 16) && + (SrcAlign == 0 || SrcAlign >= 16))) && + Subtarget->getStackAlignment() >= 16) { + if (Subtarget->hasSSE2()) + return MVT::v4i32; + if (SafeToUseFP && Subtarget->hasSSE1()) + return MVT::v4f32; + } else if (SafeToUseFP && + Size >= 8 && + !Subtarget->is64Bit() && + Subtarget->getStackAlignment() >= 8 && + Subtarget->hasSSE2()) + return MVT::f64; } if (Subtarget->is64Bit() && Size >= 8) return MVT::i64; @@ -1108,8 +1123,8 @@ MCSymbol * X86TargetLowering::getPICBaseSymbol(const MachineFunction *MF, MCContext &Ctx) const { const MCAsmInfo &MAI = *getTargetMachine().getMCAsmInfo(); - return Ctx.GetOrCreateTemporarySymbol(Twine(MAI.getPrivateGlobalPrefix())+ - Twine(MF->getFunctionNumber())+"$pb"); + return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+ + Twine(MF->getFunctionNumber())+"$pb"); } @@ -2290,6 +2305,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, return false; // If -tailcallopt is specified, make fastcc functions tail-callable. + const MachineFunction &MF = DAG.getMachineFunction(); const Function *CallerF = DAG.getMachineFunction().getFunction(); if (GuaranteedTailCallOpt) { if (IsTailCallConvention(CalleeCC) && @@ -2301,8 +2317,14 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // Look for obvious safe cases to perform tail call optimization that does not // requite ABI changes. This is what gcc calls sibcall. - // Do not sibcall optimize vararg calls for now. - if (isVarArg) + // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to + // emit a special epilogue. + if (RegInfo->needsStackRealignment(MF)) + return false; + + // Do not sibcall optimize vararg calls unless the call site is not passing any + // arguments. + if (isVarArg && !Outs.empty()) return false; // Also avoid sibcall optimization if either caller or callee uses struct @@ -2417,7 +2439,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement) { // Offset should fit into 32 bit immediate field. - if (!isInt32(Offset)) + if (!isInt<32>(Offset)) return false; // If we don't have a symbolic displacement - we don't have any extra @@ -3613,6 +3635,69 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, return SDValue(); } +/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a +/// vector of type 'VT', see if the elements can be replaced by a single large +/// load which has the same value as a build_vector whose operands are 'elts'. +/// +/// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a +/// +/// FIXME: we'd also like to handle the case where the last elements are zero +/// rather than undef via VZEXT_LOAD, but we do not detect that case today. +/// There's even a handy isZeroNode for that purpose. +static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, + DebugLoc &dl, SelectionDAG &DAG) { + EVT EltVT = VT.getVectorElementType(); + unsigned NumElems = Elts.size(); + + LoadSDNode *LDBase = NULL; + unsigned LastLoadedElt = -1U; + + // For each element in the initializer, see if we've found a load or an undef. + // If we don't find an initial load element, or later load elements are + // non-consecutive, bail out. + for (unsigned i = 0; i < NumElems; ++i) { + SDValue Elt = Elts[i]; + + if (!Elt.getNode() || + (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode()))) + return SDValue(); + if (!LDBase) { + if (Elt.getNode()->getOpcode() == ISD::UNDEF) + return SDValue(); + LDBase = cast<LoadSDNode>(Elt.getNode()); + LastLoadedElt = i; + continue; + } + if (Elt.getOpcode() == ISD::UNDEF) + continue; + + LoadSDNode *LD = cast<LoadSDNode>(Elt); + if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i)) + return SDValue(); + LastLoadedElt = i; + } + + // If we have found an entire vector of loads and undefs, then return a large + // load of the entire vector width starting at the base pointer. If we found + // consecutive loads for the low half, generate a vzext_load node. + if (LastLoadedElt == NumElems - 1) { + if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16) + return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(), + LDBase->getSrcValue(), LDBase->getSrcValueOffset(), + LDBase->isVolatile(), LDBase->isNonTemporal(), 0); + return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(), + LDBase->getSrcValue(), LDBase->getSrcValueOffset(), + LDBase->isVolatile(), LDBase->isNonTemporal(), + LDBase->getAlignment()); + } else if (NumElems == 4 && LastLoadedElt == 1) { + SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); + SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() }; + SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode); + } + return SDValue(); +} + SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); @@ -3841,14 +3926,18 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]); } - if (Values.size() > 2) { - // If we have SSE 4.1, Expand into a number of inserts unless the number of - // values to be inserted is equal to the number of elements, in which case - // use the unpack code below in the hopes of matching the consecutive elts - // load merge pattern for shuffles. - // FIXME: We could probably just check that here directly. - if (Values.size() < NumElems && VT.getSizeInBits() == 128 && - getSubtarget()->hasSSE41()) { + if (Values.size() > 1 && VT.getSizeInBits() == 128) { + // Check for a build vector of consecutive loads. + for (unsigned i = 0; i < NumElems; ++i) + V[i] = Op.getOperand(i); + + // Check for elements which are consecutive loads. + SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG); + if (LD.getNode()) + return LD; + + // For SSE 4.1, use inserts into undef. + if (getSubtarget()->hasSSE41()) { V[0] = DAG.getUNDEF(VT); for (unsigned i = 0; i < NumElems; ++i) if (Op.getOperand(i).getOpcode() != ISD::UNDEF) @@ -3856,7 +3945,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { Op.getOperand(i), DAG.getIntPtrConstant(i)); return V[0]; } - // Expand into a number of unpckl*. + + // Otherwise, expand into a number of unpckl* // e.g. for v4f32 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> @@ -3871,7 +3961,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { } return V[0]; } - return SDValue(); } @@ -8797,83 +8886,24 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N, return TargetLowering::isGAPlusOffset(N, GA, Offset); } -static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, - EVT EltVT, LoadSDNode *&LDBase, - unsigned &LastLoadedElt, - SelectionDAG &DAG, MachineFrameInfo *MFI, - const TargetLowering &TLI) { - LDBase = NULL; - LastLoadedElt = -1U; - for (unsigned i = 0; i < NumElems; ++i) { - if (N->getMaskElt(i) < 0) { - if (!LDBase) - return false; - continue; - } - - SDValue Elt = DAG.getShuffleScalarElt(N, i); - if (!Elt.getNode() || - (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode()))) - return false; - if (!LDBase) { - if (Elt.getNode()->getOpcode() == ISD::UNDEF) - return false; - LDBase = cast<LoadSDNode>(Elt.getNode()); - LastLoadedElt = i; - continue; - } - if (Elt.getOpcode() == ISD::UNDEF) - continue; - - LoadSDNode *LD = cast<LoadSDNode>(Elt); - if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i)) - return false; - LastLoadedElt = i; - } - return true; -} - /// PerformShuffleCombine - Combine a vector_shuffle that is equal to /// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load /// if the load addresses are consecutive, non-overlapping, and in the right -/// order. In the case of v2i64, it will see if it can rewrite the -/// shuffle to be an appropriate build vector so it can take advantage of -// performBuildVectorCombine. +/// order. static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI) { DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); - EVT EltVT = VT.getVectorElementType(); ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); - unsigned NumElems = VT.getVectorNumElements(); if (VT.getSizeInBits() != 128) return SDValue(); - // Try to combine a vector_shuffle into a 128-bit load. - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); - LoadSDNode *LD = NULL; - unsigned LastLoadedElt; - if (!EltsFromConsecutiveLoads(SVN, NumElems, EltVT, LD, LastLoadedElt, DAG, - MFI, TLI)) - return SDValue(); - - if (LastLoadedElt == NumElems - 1) { - if (DAG.InferPtrAlignment(LD->getBasePtr()) >= 16) - return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), - LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile(), LD->isNonTemporal(), 0); - return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), - LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); - } else if (NumElems == 4 && LastLoadedElt == 1) { - SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); - SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; - SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode); - } - return SDValue(); + SmallVector<SDValue, 16> Elts; + for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) + Elts.push_back(DAG.getShuffleScalarElt(SVN, i)); + + return EltsFromConsecutiveLoads(VT, Elts, dl, DAG); } /// PerformShuffleCombine - Detect vector gather/scatter index generation diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 0f15eba..4549cba 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -417,12 +417,15 @@ namespace llvm { virtual unsigned getByValTypeAlignment(const Type *Ty) const; /// getOptimalMemOpType - Returns the target specific optimal type for load - /// and store operations as a result of memset, memcpy, and memmove - /// lowering. It returns EVT::iAny if SelectionDAG should be responsible for - /// determining it. - virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align, - bool isSrcConst, bool isSrcStr, - SelectionDAG &DAG) const; + /// and store operations as a result of memset, memcpy, and memmove lowering. + /// If DstAlign is zero that means it's safe to destination alignment can + /// satisfy any constraint. Similarly if SrcAlign is zero it means there + /// isn't a need to check it against alignment requirement, probably because + /// the source does not need to be loaded. It returns EVT::Other if + /// SelectionDAG should be responsible for determining it. + virtual EVT getOptimalMemOpType(uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool SafeToUseFP, SelectionDAG &DAG) const; /// allowsUnalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses. of the specified type. diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 8cbb756..eef2ca0 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -295,19 +295,17 @@ def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src), let Defs = [EFLAGS] in { def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "bsf{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (X86bsf GR64:$src)), (implicit EFLAGS)]>, TB; + [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, TB; def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "bsf{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (X86bsf (loadi64 addr:$src))), - (implicit EFLAGS)]>, TB; + [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, TB; def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "bsr{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (X86bsr GR64:$src)), (implicit EFLAGS)]>, TB; + [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, TB; def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "bsr{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (X86bsr (loadi64 addr:$src))), - (implicit EFLAGS)]>, TB; + [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, TB; } // Defs = [EFLAGS] // Repeat string ops @@ -508,8 +506,8 @@ let isCommutable = 1 in def ADD64rr : RI<0x01, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "add{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (add GR64:$src1, GR64:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86add_flag GR64:$src1, GR64:$src2))]>; // These are alternate spellings for use by the disassembler, we mark them as // code gen only to ensure they aren't matched by the assembler. @@ -523,21 +521,21 @@ let isCodeGenOnly = 1 in { def ADD64ri8 : RIi8<0x83, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), "add{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (add GR64:$src1, i64immSExt8:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86add_flag GR64:$src1, i64immSExt8:$src2))]>; def ADD64ri32 : RIi32<0x81, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "add{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (add GR64:$src1, i64immSExt32:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86add_flag GR64:$src1, i64immSExt32:$src2))]>; } // isConvertibleToThreeAddress // Register-Memory Addition def ADD64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "add{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (add GR64:$src1, (load addr:$src2))), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86add_flag GR64:$src1, (load addr:$src2)))]>; } // isTwoAddress @@ -604,8 +602,8 @@ let isTwoAddress = 1 in { def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "sub{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (sub GR64:$src1, GR64:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86sub_flag GR64:$src1, GR64:$src2))]>; def SUB64rr_REV : RI<0x2B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -615,20 +613,20 @@ def SUB64rr_REV : RI<0x2B, MRMSrcReg, (outs GR64:$dst), def SUB64rm : RI<0x2B, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "sub{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (sub GR64:$src1, (load addr:$src2))), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86sub_flag GR64:$src1, (load addr:$src2)))]>; // Register-Integer Subtraction def SUB64ri8 : RIi8<0x83, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), "sub{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (sub GR64:$src1, i64immSExt8:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86sub_flag GR64:$src1, i64immSExt8:$src2))]>; def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "sub{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (sub GR64:$src1, i64immSExt32:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86sub_flag GR64:$src1, i64immSExt32:$src2))]>; } // isTwoAddress def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i32imm:$src), @@ -716,15 +714,15 @@ let isCommutable = 1 in def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "imul{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (mul GR64:$src1, GR64:$src2)), - (implicit EFLAGS)]>, TB; + [(set GR64:$dst, EFLAGS, + (X86smul_flag GR64:$src1, GR64:$src2))]>, TB; // Register-Memory Signed Integer Multiplication def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "imul{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (mul GR64:$src1, (load addr:$src2))), - (implicit EFLAGS)]>, TB; + [(set GR64:$dst, EFLAGS, + (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB; } // isTwoAddress // Suprisingly enough, these are not two address instructions! @@ -733,27 +731,27 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8 (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR64:$dst, (mul GR64:$src1, i64immSExt8:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>; def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32 (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>; // Memory-Integer Signed Integer Multiplication def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR64:$dst, (mul (load addr:$src1), - i64immSExt8:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86smul_flag (load addr:$src1), + i64immSExt8:$src2))]>; def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32 (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR64:$dst, (mul (load addr:$src1), - i64immSExt32:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86smul_flag (load addr:$src1), + i64immSExt32:$src2))]>; } // Defs = [EFLAGS] // Unsigned division / remainder @@ -787,16 +785,14 @@ def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst", let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src), "inc{q}\t$dst", - [(set GR64:$dst, (add GR64:$src, 1)), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src))]>; def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst", [(store (add (loadi64 addr:$dst), 1), addr:$dst), (implicit EFLAGS)]>; let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src), "dec{q}\t$dst", - [(set GR64:$dst, (add GR64:$src, -1)), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src))]>; def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", [(store (add (loadi64 addr:$dst), -1), addr:$dst), (implicit EFLAGS)]>; @@ -806,23 +802,19 @@ let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in { // Can transform into LEA. def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src), "inc{w}\t$dst", - [(set GR16:$dst, (add GR16:$src, 1)), - (implicit EFLAGS)]>, + [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src))]>, OpSize, Requires<[In64BitMode]>; def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src), "inc{l}\t$dst", - [(set GR32:$dst, (add GR32:$src, 1)), - (implicit EFLAGS)]>, + [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src))]>, Requires<[In64BitMode]>; def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src), "dec{w}\t$dst", - [(set GR16:$dst, (add GR16:$src, -1)), - (implicit EFLAGS)]>, + [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src))]>, OpSize, Requires<[In64BitMode]>; def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src), "dec{l}\t$dst", - [(set GR32:$dst, (add GR32:$src, -1)), - (implicit EFLAGS)]>, + [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>, Requires<[In64BitMode]>; } // isConvertibleToThreeAddress @@ -1092,26 +1084,26 @@ let isCommutable = 1 in def AND64rr : RI<0x21, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "and{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (and GR64:$src1, GR64:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86and_flag GR64:$src1, GR64:$src2))]>; def AND64rr_REV : RI<0x23, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "and{q}\t{$src2, $dst|$dst, $src2}", []>; def AND64rm : RI<0x23, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "and{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (and GR64:$src1, (load addr:$src2))), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86and_flag GR64:$src1, (load addr:$src2)))]>; def AND64ri8 : RIi8<0x83, MRM4r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), "and{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (and GR64:$src1, i64immSExt8:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86and_flag GR64:$src1, i64immSExt8:$src2))]>; def AND64ri32 : RIi32<0x81, MRM4r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "and{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (and GR64:$src1, i64immSExt32:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86and_flag GR64:$src1, i64immSExt32:$src2))]>; } // isTwoAddress def AND64mr : RI<0x21, MRMDestMem, @@ -1135,26 +1127,26 @@ let isCommutable = 1 in def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (or GR64:$src1, GR64:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86or_flag GR64:$src1, GR64:$src2))]>; def OR64rr_REV : RI<0x0B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", []>; def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (or GR64:$src1, (load addr:$src2))), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86or_flag GR64:$src1, (load addr:$src2)))]>; def OR64ri8 : RIi8<0x83, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86or_flag GR64:$src1, i64immSExt8:$src2))]>; def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86or_flag GR64:$src1, i64immSExt32:$src2))]>; } // isTwoAddress def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), @@ -1178,26 +1170,26 @@ let isCommutable = 1 in def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "xor{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (xor GR64:$src1, GR64:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86xor_flag GR64:$src1, GR64:$src2))]>; def XOR64rr_REV : RI<0x33, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "xor{q}\t{$src2, $dst|$dst, $src2}", []>; def XOR64rm : RI<0x33, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "xor{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (xor GR64:$src1, (load addr:$src2))), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86xor_flag GR64:$src1, (load addr:$src2)))]>; def XOR64ri8 : RIi8<0x83, MRM6r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), "xor{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (xor GR64:$src1, i64immSExt8:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86xor_flag GR64:$src1, i64immSExt8:$src2))]>; def XOR64ri32 : RIi32<0x81, MRM6r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "xor{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (xor GR64:$src1, i64immSExt32:$src2)), - (implicit EFLAGS)]>; + [(set GR64:$dst, EFLAGS, + (X86xor_flag GR64:$src1, i64immSExt32:$src2))]>; } // isTwoAddress def XOR64mr : RI<0x31, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), @@ -2181,14 +2173,11 @@ def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1), // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. let AddedComplexity = 5 in { // Try this before the selecting to OR -def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt8:$src2), - (implicit EFLAGS)), +def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2), (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt32:$src2), - (implicit EFLAGS)), +def : Pat<(or_is_add GR64:$src1, i64immSExt32:$src2), (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; -def : Pat<(parallel (or_is_add GR64:$src1, GR64:$src2), - (implicit EFLAGS)), +def : Pat<(or_is_add GR64:$src1, GR64:$src2), (ADD64rr GR64:$src1, GR64:$src2)>; } // AddedComplexity @@ -2215,136 +2204,76 @@ def : Pat<(subc GR64:$src1, imm:$src2), // EFLAGS-defining Patterns //===----------------------------------------------------------------------===// -// Register-Register Addition with EFLAGS result -def : Pat<(parallel (X86add_flag GR64:$src1, GR64:$src2), - (implicit EFLAGS)), +// addition +def : Pat<(add GR64:$src1, GR64:$src2), (ADD64rr GR64:$src1, GR64:$src2)>; - -// Register-Integer Addition with EFLAGS result -def : Pat<(parallel (X86add_flag GR64:$src1, i64immSExt8:$src2), - (implicit EFLAGS)), +def : Pat<(add GR64:$src1, i64immSExt8:$src2), (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(parallel (X86add_flag GR64:$src1, i64immSExt32:$src2), - (implicit EFLAGS)), +def : Pat<(add GR64:$src1, i64immSExt32:$src2), (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; - -// Register-Memory Addition with EFLAGS result -def : Pat<(parallel (X86add_flag GR64:$src1, (loadi64 addr:$src2)), - (implicit EFLAGS)), +def : Pat<(add GR64:$src1, (loadi64 addr:$src2)), (ADD64rm GR64:$src1, addr:$src2)>; -// Register-Register Subtraction with EFLAGS result -def : Pat<(parallel (X86sub_flag GR64:$src1, GR64:$src2), - (implicit EFLAGS)), +// subtraction +def : Pat<(sub GR64:$src1, GR64:$src2), (SUB64rr GR64:$src1, GR64:$src2)>; - -// Register-Memory Subtraction with EFLAGS result -def : Pat<(parallel (X86sub_flag GR64:$src1, (loadi64 addr:$src2)), - (implicit EFLAGS)), +def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)), (SUB64rm GR64:$src1, addr:$src2)>; - -// Register-Integer Subtraction with EFLAGS result -def : Pat<(parallel (X86sub_flag GR64:$src1, i64immSExt8:$src2), - (implicit EFLAGS)), +def : Pat<(sub GR64:$src1, i64immSExt8:$src2), (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(parallel (X86sub_flag GR64:$src1, i64immSExt32:$src2), - (implicit EFLAGS)), +def : Pat<(sub GR64:$src1, i64immSExt32:$src2), (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>; -// Register-Register Signed Integer Multiplication with EFLAGS result -def : Pat<(parallel (X86smul_flag GR64:$src1, GR64:$src2), - (implicit EFLAGS)), +// Multiply +def : Pat<(mul GR64:$src1, GR64:$src2), (IMUL64rr GR64:$src1, GR64:$src2)>; - -// Register-Memory Signed Integer Multiplication with EFLAGS result -def : Pat<(parallel (X86smul_flag GR64:$src1, (loadi64 addr:$src2)), - (implicit EFLAGS)), +def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)), (IMUL64rm GR64:$src1, addr:$src2)>; - -// Register-Integer Signed Integer Multiplication with EFLAGS result -def : Pat<(parallel (X86smul_flag GR64:$src1, i64immSExt8:$src2), - (implicit EFLAGS)), +def : Pat<(mul GR64:$src1, i64immSExt8:$src2), (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(parallel (X86smul_flag GR64:$src1, i64immSExt32:$src2), - (implicit EFLAGS)), +def : Pat<(mul GR64:$src1, i64immSExt32:$src2), (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>; - -// Memory-Integer Signed Integer Multiplication with EFLAGS result -def : Pat<(parallel (X86smul_flag (loadi64 addr:$src1), i64immSExt8:$src2), - (implicit EFLAGS)), +def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2), (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>; -def : Pat<(parallel (X86smul_flag (loadi64 addr:$src1), i64immSExt32:$src2), - (implicit EFLAGS)), +def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2), (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>; -// INC and DEC with EFLAGS result. Note that these do not set CF. -def : Pat<(parallel (X86inc_flag GR16:$src), (implicit EFLAGS)), - (INC64_16r GR16:$src)>, Requires<[In64BitMode]>; -def : Pat<(parallel (X86dec_flag GR16:$src), (implicit EFLAGS)), - (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>; - -def : Pat<(parallel (X86inc_flag GR32:$src), (implicit EFLAGS)), - (INC64_32r GR32:$src)>, Requires<[In64BitMode]>; -def : Pat<(parallel (X86dec_flag GR32:$src), (implicit EFLAGS)), - (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>; - -def : Pat<(parallel (X86inc_flag GR64:$src), (implicit EFLAGS)), - (INC64r GR64:$src)>; -def : Pat<(parallel (X86dec_flag GR64:$src), (implicit EFLAGS)), - (DEC64r GR64:$src)>; - -// Register-Register Logical Or with EFLAGS result -def : Pat<(parallel (X86or_flag GR64:$src1, GR64:$src2), - (implicit EFLAGS)), - (OR64rr GR64:$src1, GR64:$src2)>; +// inc/dec +def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>; +def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>; +def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>; +def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>; +def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>; +def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>; -// Register-Integer Logical Or with EFLAGS result -def : Pat<(parallel (X86or_flag GR64:$src1, i64immSExt8:$src2), - (implicit EFLAGS)), +// or +def : Pat<(or GR64:$src1, GR64:$src2), + (OR64rr GR64:$src1, GR64:$src2)>; +def : Pat<(or GR64:$src1, i64immSExt8:$src2), (OR64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(parallel (X86or_flag GR64:$src1, i64immSExt32:$src2), - (implicit EFLAGS)), +def : Pat<(or GR64:$src1, i64immSExt32:$src2), (OR64ri32 GR64:$src1, i64immSExt32:$src2)>; - -// Register-Memory Logical Or with EFLAGS result -def : Pat<(parallel (X86or_flag GR64:$src1, (loadi64 addr:$src2)), - (implicit EFLAGS)), +def : Pat<(or GR64:$src1, (loadi64 addr:$src2)), (OR64rm GR64:$src1, addr:$src2)>; -// Register-Register Logical XOr with EFLAGS result -def : Pat<(parallel (X86xor_flag GR64:$src1, GR64:$src2), - (implicit EFLAGS)), +// xor +def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>; - -// Register-Integer Logical XOr with EFLAGS result -def : Pat<(parallel (X86xor_flag GR64:$src1, i64immSExt8:$src2), - (implicit EFLAGS)), +def : Pat<(xor GR64:$src1, i64immSExt8:$src2), (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(parallel (X86xor_flag GR64:$src1, i64immSExt32:$src2), - (implicit EFLAGS)), +def : Pat<(xor GR64:$src1, i64immSExt32:$src2), (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>; - -// Register-Memory Logical XOr with EFLAGS result -def : Pat<(parallel (X86xor_flag GR64:$src1, (loadi64 addr:$src2)), - (implicit EFLAGS)), +def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)), (XOR64rm GR64:$src1, addr:$src2)>; -// Register-Register Logical And with EFLAGS result -def : Pat<(parallel (X86and_flag GR64:$src1, GR64:$src2), - (implicit EFLAGS)), +// and +def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>; - -// Register-Integer Logical And with EFLAGS result -def : Pat<(parallel (X86and_flag GR64:$src1, i64immSExt8:$src2), - (implicit EFLAGS)), +def : Pat<(and GR64:$src1, i64immSExt8:$src2), (AND64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(parallel (X86and_flag GR64:$src1, i64immSExt32:$src2), - (implicit EFLAGS)), +def : Pat<(and GR64:$src1, i64immSExt32:$src2), (AND64ri32 GR64:$src1, i64immSExt32:$src2)>; - -// Register-Memory Logical And with EFLAGS result -def : Pat<(parallel (X86and_flag GR64:$src1, (loadi64 addr:$src2)), - (implicit EFLAGS)), +def : Pat<(and GR64:$src1, (loadi64 addr:$src2)), (AND64rm GR64:$src1, addr:$src2)>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index bb81cbf..d25ec26 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -68,6 +68,16 @@ def CompareFP : FPFormat<5>; def CondMovFP : FPFormat<6>; def SpecialFP : FPFormat<7>; +// Class specifying the SSE execution domain, used by the SSEDomainFix pass. +// Keep in sync with tables in X86InstrInfo.cpp. +class Domain<bits<2> val> { + bits<2> Value = val; +} +def GenericDomain : Domain<0>; +def SSEPackedSingle : Domain<1>; +def SSEPackedDouble : Domain<2>; +def SSEPackedInt : Domain<3>; + // Prefix byte classes which are used to indicate to the ad-hoc machine code // emitter that various prefix bytes are required. class OpSize { bit hasOpSizePrefix = 1; } @@ -93,7 +103,7 @@ class TA { bits<4> Prefix = 14; } class TF { bits<4> Prefix = 15; } class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, - string AsmStr> + string AsmStr, Domain d = GenericDomain> : Instruction { let Namespace = "X86"; @@ -101,7 +111,6 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, Format Form = f; bits<6> FormBits = Form.Value; ImmType ImmT = i; - bits<3> ImmTypeBits = ImmT.Value; dag OutOperandList = outs; dag InOperandList = ins; @@ -115,20 +124,21 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, bits<4> Prefix = 0; // Which prefix byte does this inst have? bit hasREX_WPrefix = 0; // Does this inst requires the REX.W prefix? - FPFormat FPForm; // What flavor of FP instruction is this? - bits<3> FPFormBits = 0; + FPFormat FPForm = NotFP; // What flavor of FP instruction is this? bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix? bits<2> SegOvrBits = 0; // Segment override prefix. + Domain ExeDomain = d; } -class I<bits<8> o, Format f, dag outs, dag ins, string asm, list<dag> pattern> - : X86Inst<o, f, NoImm, outs, ins, asm> { +class I<bits<8> o, Format f, dag outs, dag ins, string asm, + list<dag> pattern, Domain d = GenericDomain> + : X86Inst<o, f, NoImm, outs, ins, asm, d> { let Pattern = pattern; let CodeSize = 3; } class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern> - : X86Inst<o, f, Imm8 , outs, ins, asm> { + list<dag> pattern, Domain d = GenericDomain> + : X86Inst<o, f, Imm8, outs, ins, asm, d> { let Pattern = pattern; let CodeSize = 3; } @@ -166,7 +176,7 @@ class FPI<bits<8> o, Format F, dag outs, dag ins, string asm> // FpI_ - Floating Point Psuedo Instruction template. Not Predicated. class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern> : X86Inst<0, Pseudo, NoImm, outs, ins, ""> { - let FPForm = fp; let FPFormBits = FPForm.Value; + let FPForm = fp; let Pattern = pattern; } @@ -196,14 +206,16 @@ class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm, class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>; -class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, +class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>; class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasSSE1]>; + : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB, + Requires<[HasSSE1]>; class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, TB, Requires<[HasSSE1]>; + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB, + Requires<[HasSSE1]>; // SSE2 Instruction Templates: // @@ -222,10 +234,12 @@ class SSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>; class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>; + : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize, + Requires<[HasSSE2]>; class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>; + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize, + Requires<[HasSSE2]>; // SSE3 Instruction Templates: // @@ -235,12 +249,15 @@ class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE3]>; + : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS, + Requires<[HasSSE3]>; class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE3]>; + : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD, + Requires<[HasSSE3]>; class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE3]>; + : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize, + Requires<[HasSSE3]>; // SSSE3 Instruction Templates: @@ -254,10 +271,12 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSSE3]>; + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, + Requires<[HasSSSE3]>; class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSSE3]>; + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + Requires<[HasSSSE3]>; // SSE4.1 Instruction Templates: // @@ -266,17 +285,20 @@ class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, // class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE41]>; + : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, + Requires<[HasSSE41]>; class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE41]>; + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + Requires<[HasSSE41]>; // SSE4.2 Instruction Templates: // // SS428I - SSE 4.2 instructions with T8 prefix. class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE42]>; + : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, + Requires<[HasSSE42]>; // SS42FI - SSE 4.2 instructions with TF prefix. class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm, @@ -286,7 +308,8 @@ class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm, // SS42AI = SSE 4.2 instructions with TA prefix class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE42]>; + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + Requires<[HasSSE42]>; // X86-64 Instruction templates... // diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 139a905..c0c9d98 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -597,7 +597,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::PMULHUWrr, X86::PMULHUWrm, 16 }, { X86::PMULHWrr, X86::PMULHWrm, 16 }, { X86::PMULLDrr, X86::PMULLDrm, 16 }, - { X86::PMULLDrr_int, X86::PMULLDrm_int, 16 }, { X86::PMULLWrr, X86::PMULLWrm, 16 }, { X86::PMULUDQrr, X86::PMULUDQrm, 16 }, { X86::PORrr, X86::PORrm, 16 }, @@ -992,8 +991,10 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, /// a few instructions in each direction it assumes it's not safe. static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { + MachineBasicBlock::iterator E = MBB.end(); + // It's always safe to clobber EFLAGS at the end of a block. - if (I == MBB.end()) + if (I == E) return true; // For compile time consideration, if we are not able to determine the @@ -1017,20 +1018,28 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, // This instruction defines EFLAGS, no need to look any further. return true; ++Iter; + // Skip over DBG_VALUE. + while (Iter != E && Iter->isDebugValue()) + ++Iter; // If we make it to the end of the block, it's safe to clobber EFLAGS. - if (Iter == MBB.end()) + if (Iter == E) return true; } + MachineBasicBlock::iterator B = MBB.begin(); Iter = I; for (unsigned i = 0; i < 4; ++i) { // If we make it to the beginning of the block, it's safe to clobber // EFLAGS iff EFLAGS is not live-in. - if (Iter == MBB.begin()) + if (Iter == B) return !MBB.isLiveIn(X86::EFLAGS); --Iter; + // Skip over DBG_VALUE. + while (Iter != B && Iter->isDebugValue()) + --Iter; + bool SawKill = false; for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { MachineOperand &MO = Iter->getOperand(j); @@ -1677,6 +1686,8 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock::iterator I = MBB.end(); while (I != MBB.begin()) { --I; + if (I->isDebugValue()) + continue; // Working from the bottom, when we see a non-terminator instruction, we're // done. @@ -1773,6 +1784,8 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { while (I != MBB.begin()) { --I; + if (I->isDebugValue()) + continue; if (I->getOpcode() != X86::JMP_4 && GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) break; @@ -2505,7 +2518,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Alignment = (*LoadMI->memoperands_begin())->getAlignment(); else switch (LoadMI->getOpcode()) { - case X86::V_SET0: + case X86::V_SET0PS: + case X86::V_SET0PD: + case X86::V_SET0PI: case X86::V_SETALLONES: Alignment = 16; break; @@ -2535,11 +2550,13 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, SmallVector<MachineOperand,X86AddrNumOperands> MOs; switch (LoadMI->getOpcode()) { - case X86::V_SET0: + case X86::V_SET0PS: + case X86::V_SET0PD: + case X86::V_SET0PI: case X86::V_SETALLONES: case X86::FsFLD0SD: case X86::FsFLD0SS: { - // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. + // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure. // Create a constant-pool entry and operands to load from it. // Medium and large mode can't fold loads this way. @@ -3648,3 +3665,51 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { X86FI->setGlobalBaseReg(GlobalBaseReg); return GlobalBaseReg; } + +// These are the replaceable SSE instructions. Some of these have Int variants +// that we don't include here. We don't want to replace instructions selected +// by intrinsics. +static const unsigned ReplaceableInstrs[][3] = { + //PackedInt PackedSingle PackedDouble + { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr }, + { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm }, + { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr }, + { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr }, + { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm }, + { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr }, + { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm }, + { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr }, + { X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm }, + { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr }, + { X86::ORPSrm, X86::ORPDrm, X86::PORrm }, + { X86::ORPSrr, X86::ORPDrr, X86::PORrr }, + { X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI }, + { X86::XORPSrm, X86::XORPDrm, X86::PXORrm }, + { X86::XORPSrr, X86::XORPDrr, X86::PXORrr }, +}; + +// FIXME: Some shuffle and unpack instructions have equivalents in different +// domains, but they require a bit more work than just switching opcodes. + +static const unsigned *lookup(unsigned opcode, unsigned domain) { + for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i) + if (ReplaceableInstrs[i][domain-1] == opcode) + return ReplaceableInstrs[i]; + return 0; +} + +std::pair<uint16_t, uint16_t> +X86InstrInfo::GetSSEDomain(const MachineInstr *MI) const { + uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; + return std::make_pair(domain, + domain && lookup(MI->getOpcode(), domain) ? 0xe : 0); +} + +void X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const { + assert(Domain>0 && Domain<4 && "Invalid execution domain"); + uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; + assert(dom && "Not an SSE instruction"); + const unsigned *table = lookup(MI->getOpcode(), dom); + assert(table && "Cannot change domain"); + MI->setDesc(get(table[Domain-1])); +} diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 5111719..f0bdd06 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -398,7 +398,10 @@ namespace X86II { FS = 1 << SegOvrShift, GS = 2 << SegOvrShift, - // Bits 22 -> 23 are unused + // Execution domain for SSE instructions in bits 22, 23. + // 0 in bits 22-23 means normal, non-SSE instruction. + SSEDomainShift = 22, + OpcodeShift = 24, OpcodeMask = 0xFF << OpcodeShift }; @@ -486,7 +489,7 @@ class X86InstrInfo : public TargetInstrInfoImpl { /// MemOp2RegOpTable - Load / store unfolding opcode map. /// DenseMap<unsigned*, std::pair<unsigned, unsigned> > MemOp2RegOpTable; - + public: explicit X86InstrInfo(X86TargetMachine &tm); @@ -716,6 +719,13 @@ public: /// unsigned getGlobalBaseReg(MachineFunction *MF) const; + /// GetSSEDomain - Return the SSE execution domain of MI as the first element, + /// and a bitmask of possible arguments to SetSSEDomain ase the second. + std::pair<uint16_t, uint16_t> GetSSEDomain(const MachineInstr *MI) const; + + /// SetSSEDomain - Set the SSEDomain of MI. + void SetSSEDomain(MachineInstr *MI, unsigned Domain) const; + private: MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc, MachineFunction::iterator &MFI, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index c80a18d..8fccc8a 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1,4 +1,4 @@ - +//===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -28,12 +28,13 @@ def SDTX86Cmov : SDTypeProfile<1, 4, SDTCisVT<3, i8>, SDTCisVT<4, i32>]>; // Unary and binary operator instructions that set EFLAGS as a side-effect. -def SDTUnaryArithWithFlags : SDTypeProfile<1, 1, - [SDTCisInt<0>]>; -def SDTBinaryArithWithFlags : SDTypeProfile<1, 2, - [SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, - SDTCisInt<0>]>; +def SDTUnaryArithWithFlags : SDTypeProfile<2, 1, + [SDTCisInt<0>, SDTCisVT<1, i32>]>; + +def SDTBinaryArithWithFlags : SDTypeProfile<2, 2, + [SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisInt<0>, SDTCisVT<1, i32>]>; def SDTX86BrCond : SDTypeProfile<0, 3, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i8>, SDTCisVT<2, i32>]>; @@ -77,8 +78,8 @@ def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>; -def X86bsf : SDNode<"X86ISD::BSF", SDTIntUnaryOp>; -def X86bsr : SDNode<"X86ISD::BSR", SDTIntUnaryOp>; +def X86bsf : SDNode<"X86ISD::BSF", SDTUnaryArithWithFlags>; +def X86bsr : SDNode<"X86ISD::BSR", SDTUnaryArithWithFlags>; def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>; def X86shrd : SDNode<"X86ISD::SHRD", SDTIntShiftDOp>; @@ -167,6 +168,7 @@ def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags, [SDNPCommutative]>; def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags, [SDNPCommutative]>; + def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>; def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>; def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags, @@ -323,6 +325,7 @@ def FarData : Predicate<"TM.getCodeModel() != CodeModel::Small &&" def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||" "TM.getCodeModel() == CodeModel::Kernel">; def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">; +def IsNotPIC : Predicate<"TM.getRelocationModel() != Reloc::PIC_">; def OptForSize : Predicate<"OptForSize">; def OptForSpeed : Predicate<"!OptForSize">; def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; @@ -473,15 +476,14 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1))) return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); - else { - unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); - APInt Mask = APInt::getAllOnesValue(BitWidth); - APInt KnownZero0, KnownOne0; - CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0); - APInt KnownZero1, KnownOne1; - CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0); - return (~KnownZero0 & ~KnownZero1) == 0; - } + + unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt KnownZero0, KnownOne0; + CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0); + APInt KnownZero1, KnownOne1; + CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0); + return (~KnownZero0 & ~KnownZero1) == 0; }]>; // 'shld' and 'shrd' instruction patterns. Note that even though these have @@ -585,7 +587,7 @@ let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in // Return instructions. let isTerminator = 1, isReturn = 1, isBarrier = 1, - hasCtrlDep = 1, FPForm = SpecialFP, FPFormBits = SpecialFP.Value in { + hasCtrlDep = 1, FPForm = SpecialFP in { def RET : I <0xC3, RawFrm, (outs), (ins variable_ops), "ret", [(X86retflag 0)]>; @@ -806,33 +808,29 @@ let isTwoAddress = 1 in // GR32 = bswap GR32 let Defs = [EFLAGS] in { def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsf{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, (X86bsf GR16:$src)), (implicit EFLAGS)]>, TB; + [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))]>, TB; def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bsf{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, (X86bsf (loadi16 addr:$src))), - (implicit EFLAGS)]>, TB; + [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))]>, TB; def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "bsf{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (X86bsf GR32:$src)), (implicit EFLAGS)]>, TB; + [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))]>, TB; def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bsf{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (X86bsf (loadi32 addr:$src))), - (implicit EFLAGS)]>, TB; + [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))]>, TB; def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsr{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, (X86bsr GR16:$src)), (implicit EFLAGS)]>, TB; + [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))]>, TB; def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bsr{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, (X86bsr (loadi16 addr:$src))), - (implicit EFLAGS)]>, TB; + [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))]>, TB; def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "bsr{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (X86bsr GR32:$src)), (implicit EFLAGS)]>, TB; + [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))]>, TB; def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bsr{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (X86bsr (loadi32 addr:$src))), - (implicit EFLAGS)]>, TB; + [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))]>, TB; } // Defs = [EFLAGS] let neverHasSideEffects = 1 in @@ -1697,18 +1695,17 @@ let isTwoAddress = 0 in { let Defs = [EFLAGS] in { let CodeSize = 2 in def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), "inc{b}\t$dst", - [(set GR8:$dst, (add GR8:$src, 1)), - (implicit EFLAGS)]>; + [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src))]>; + let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), "inc{w}\t$dst", - [(set GR16:$dst, (add GR16:$src, 1)), - (implicit EFLAGS)]>, + [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src))]>, OpSize, Requires<[In32BitMode]>; def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "inc{l}\t$dst", - [(set GR32:$dst, (add GR32:$src, 1)), - (implicit EFLAGS)]>, Requires<[In32BitMode]>; + [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src))]>, + Requires<[In32BitMode]>; } let isTwoAddress = 0, CodeSize = 2 in { def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", @@ -1726,18 +1723,16 @@ let isTwoAddress = 0, CodeSize = 2 in { let CodeSize = 2 in def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src), "dec{b}\t$dst", - [(set GR8:$dst, (add GR8:$src, -1)), - (implicit EFLAGS)]>; + [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src))]>; let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), "dec{w}\t$dst", - [(set GR16:$dst, (add GR16:$src, -1)), - (implicit EFLAGS)]>, + [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src))]>, OpSize, Requires<[In32BitMode]>; def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "dec{l}\t$dst", - [(set GR32:$dst, (add GR32:$src, -1)), - (implicit EFLAGS)]>, Requires<[In32BitMode]>; + [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>, + Requires<[In32BitMode]>; } let isTwoAddress = 0, CodeSize = 2 in { @@ -1758,21 +1753,20 @@ let isTwoAddress = 0, CodeSize = 2 in { // Logical operators... let Defs = [EFLAGS] in { let isCommutable = 1 in { // X = AND Y, Z --> X = AND Z, Y -def AND8rr : I<0x20, MRMDestReg, - (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2), - "and{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (and GR8:$src1, GR8:$src2)), - (implicit EFLAGS)]>; -def AND16rr : I<0x21, MRMDestReg, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "and{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (and GR16:$src1, GR16:$src2)), - (implicit EFLAGS)]>, OpSize; -def AND32rr : I<0x21, MRMDestReg, - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "and{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (and GR32:$src1, GR32:$src2)), - (implicit EFLAGS)]>; +def AND8rr : I<0x20, MRMDestReg, + (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2), + "and{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1, GR8:$src2))]>; +def AND16rr : I<0x21, MRMDestReg, + (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "and{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1, + GR16:$src2))]>, OpSize; +def AND32rr : I<0x21, MRMDestReg, + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "and{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1, + GR32:$src2))]>; } // AND instructions with the destination register in REG and the source register @@ -1789,45 +1783,46 @@ def AND32rr_REV : I<0x23, MRMSrcReg, (outs GR32:$dst), def AND8rm : I<0x22, MRMSrcMem, (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2), "and{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (and GR8:$src1, (loadi8 addr:$src2))), - (implicit EFLAGS)]>; + [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1, + (loadi8 addr:$src2)))]>; def AND16rm : I<0x23, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), "and{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (and GR16:$src1, (loadi16 addr:$src2))), - (implicit EFLAGS)]>, OpSize; + [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1, + (loadi16 addr:$src2)))]>, + OpSize; def AND32rm : I<0x23, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), "and{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (and GR32:$src1, (loadi32 addr:$src2))), - (implicit EFLAGS)]>; + [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1, + (loadi32 addr:$src2)))]>; def AND8ri : Ii8<0x80, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm :$src2), "and{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (and GR8:$src1, imm:$src2)), - (implicit EFLAGS)]>; + [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1, + imm:$src2))]>; def AND16ri : Ii16<0x81, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), "and{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (and GR16:$src1, imm:$src2)), - (implicit EFLAGS)]>, OpSize; + [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1, + imm:$src2))]>, OpSize; def AND32ri : Ii32<0x81, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), "and{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (and GR32:$src1, imm:$src2)), - (implicit EFLAGS)]>; + [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1, + imm:$src2))]>; def AND16ri8 : Ii8<0x83, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "and{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (and GR16:$src1, i16immSExt8:$src2)), - (implicit EFLAGS)]>, + [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1, + i16immSExt8:$src2))]>, OpSize; def AND32ri8 : Ii8<0x83, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), "and{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (and GR32:$src1, i32immSExt8:$src2)), - (implicit EFLAGS)]>; + [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1, + i32immSExt8:$src2))]>; let isTwoAddress = 0 in { def AND8mr : I<0x20, MRMDestMem, @@ -1888,18 +1883,16 @@ let isCommutable = 1 in { // X = OR Y, Z --> X = OR Z, Y def OR8rr : I<0x08, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2), "or{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (or GR8:$src1, GR8:$src2)), - (implicit EFLAGS)]>; + [(set GR8:$dst, EFLAGS, (X86or_flag GR8:$src1, GR8:$src2))]>; def OR16rr : I<0x09, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "or{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (or GR16:$src1, GR16:$src2)), - (implicit EFLAGS)]>, OpSize; + [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,GR16:$src2))]>, + OpSize; def OR32rr : I<0x09, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "or{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (or GR32:$src1, GR32:$src2)), - (implicit EFLAGS)]>; + [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,GR32:$src2))]>; } // OR instructions with the destination register in REG and the source register @@ -1913,48 +1906,48 @@ def OR32rr_REV : I<0x0B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "or{l}\t{$src2, $dst|$dst, $src2}", []>; -def OR8rm : I<0x0A, MRMSrcMem , (outs GR8 :$dst), +def OR8rm : I<0x0A, MRMSrcMem, (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2), "or{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (or GR8:$src1, (load addr:$src2))), - (implicit EFLAGS)]>; -def OR16rm : I<0x0B, MRMSrcMem , (outs GR16:$dst), + [(set GR8:$dst, EFLAGS, (X86or_flag GR8:$src1, + (load addr:$src2)))]>; +def OR16rm : I<0x0B, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), "or{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (or GR16:$src1, (load addr:$src2))), - (implicit EFLAGS)]>, OpSize; -def OR32rm : I<0x0B, MRMSrcMem , (outs GR32:$dst), + [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1, + (load addr:$src2)))]>, + OpSize; +def OR32rm : I<0x0B, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), "or{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (or GR32:$src1, (load addr:$src2))), - (implicit EFLAGS)]>; + [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1, + (load addr:$src2)))]>; def OR8ri : Ii8 <0x80, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), "or{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (or GR8:$src1, imm:$src2)), - (implicit EFLAGS)]>; + [(set GR8:$dst,EFLAGS, (X86or_flag GR8:$src1, imm:$src2))]>; def OR16ri : Ii16<0x81, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), "or{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (or GR16:$src1, imm:$src2)), - (implicit EFLAGS)]>, OpSize; + [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1, + imm:$src2))]>, OpSize; def OR32ri : Ii32<0x81, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), "or{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (or GR32:$src1, imm:$src2)), - (implicit EFLAGS)]>; + [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1, + imm:$src2))]>; def OR16ri8 : Ii8<0x83, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "or{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (or GR16:$src1, i16immSExt8:$src2)), - (implicit EFLAGS)]>, OpSize; + [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1, + i16immSExt8:$src2))]>, OpSize; def OR32ri8 : Ii8<0x83, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), "or{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (or GR32:$src1, i32immSExt8:$src2)), - (implicit EFLAGS)]>; + [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1, + i32immSExt8:$src2))]>; let isTwoAddress = 0 in { def OR8mr : I<0x08, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), "or{b}\t{$src, $dst|$dst, $src}", @@ -2004,18 +1997,18 @@ let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y def XOR8rr : I<0x30, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2), "xor{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (xor GR8:$src1, GR8:$src2)), - (implicit EFLAGS)]>; + [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1, + GR8:$src2))]>; def XOR16rr : I<0x31, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "xor{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (xor GR16:$src1, GR16:$src2)), - (implicit EFLAGS)]>, OpSize; + [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1, + GR16:$src2))]>, OpSize; def XOR32rr : I<0x31, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "xor{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (xor GR32:$src1, GR32:$src2)), - (implicit EFLAGS)]>; + [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1, + GR32:$src2))]>; } // isCommutable = 1 // XOR instructions with the destination register in REG and the source register @@ -2029,49 +2022,48 @@ def XOR32rr_REV : I<0x33, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "xor{l}\t{$src2, $dst|$dst, $src2}", []>; -def XOR8rm : I<0x32, MRMSrcMem , +def XOR8rm : I<0x32, MRMSrcMem, (outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2), "xor{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (xor GR8:$src1, (load addr:$src2))), - (implicit EFLAGS)]>; -def XOR16rm : I<0x33, MRMSrcMem , + [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1, + (load addr:$src2)))]>; +def XOR16rm : I<0x33, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), "xor{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (xor GR16:$src1, (load addr:$src2))), - (implicit EFLAGS)]>, + [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1, + (load addr:$src2)))]>, OpSize; -def XOR32rm : I<0x33, MRMSrcMem , +def XOR32rm : I<0x33, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), "xor{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (xor GR32:$src1, (load addr:$src2))), - (implicit EFLAGS)]>; - -def XOR8ri : Ii8<0x80, MRM6r, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), - "xor{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (xor GR8:$src1, imm:$src2)), - (implicit EFLAGS)]>; -def XOR16ri : Ii16<0x81, MRM6r, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), - "xor{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (xor GR16:$src1, imm:$src2)), - (implicit EFLAGS)]>, OpSize; + [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1, + (load addr:$src2)))]>; + +def XOR8ri : Ii8<0x80, MRM6r, + (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + "xor{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1, imm:$src2))]>; +def XOR16ri : Ii16<0x81, MRM6r, + (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + "xor{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1, + imm:$src2))]>, OpSize; def XOR32ri : Ii32<0x81, MRM6r, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), "xor{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (xor GR32:$src1, imm:$src2)), - (implicit EFLAGS)]>; + [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1, + imm:$src2))]>; def XOR16ri8 : Ii8<0x83, MRM6r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "xor{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (xor GR16:$src1, i16immSExt8:$src2)), - (implicit EFLAGS)]>, + [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1, + i16immSExt8:$src2))]>, OpSize; def XOR32ri8 : Ii8<0x83, MRM6r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), "xor{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (xor GR32:$src1, i32immSExt8:$src2)), - (implicit EFLAGS)]>; + [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1, + i32immSExt8:$src2))]>; let isTwoAddress = 0 in { def XOR8mr : I<0x30, MRMDestMem, @@ -2118,12 +2110,12 @@ let isTwoAddress = 0 in { [(store (xor (load addr:$dst), i32immSExt8:$src), addr:$dst), (implicit EFLAGS)]>; - def XOR8i8 : Ii8 <0x34, RawFrm, (outs), (ins i8imm:$src), - "xor{b}\t{$src, %al|%al, $src}", []>; - def XOR16i16 : Ii16 <0x35, RawFrm, (outs), (ins i16imm:$src), - "xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize; - def XOR32i32 : Ii32 <0x35, RawFrm, (outs), (ins i32imm:$src), - "xor{l}\t{$src, %eax|%eax, $src}", []>; + def XOR8i8 : Ii8 <0x34, RawFrm, (outs), (ins i8imm:$src), + "xor{b}\t{$src, %al|%al, $src}", []>; + def XOR16i16 : Ii16<0x35, RawFrm, (outs), (ins i16imm:$src), + "xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize; + def XOR32i32 : Ii32<0x35, RawFrm, (outs), (ins i32imm:$src), + "xor{l}\t{$src, %eax|%eax, $src}", []>; } // isTwoAddress = 0 } // Defs = [EFLAGS] @@ -2690,21 +2682,20 @@ let isCommutable = 1 in { // X = ADD Y, Z --> X = ADD Z, Y def ADD8rr : I<0x00, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2), "add{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (add GR8:$src1, GR8:$src2)), - (implicit EFLAGS)]>; + [(set GR8:$dst, EFLAGS, (X86add_flag GR8:$src1, GR8:$src2))]>; let isConvertibleToThreeAddress = 1 in { // Can transform into LEA. // Register-Register Addition def ADD16rr : I<0x01, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "add{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (add GR16:$src1, GR16:$src2)), - (implicit EFLAGS)]>, OpSize; + [(set GR16:$dst, EFLAGS, (X86add_flag GR16:$src1, + GR16:$src2))]>, OpSize; def ADD32rr : I<0x01, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "add{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (add GR32:$src1, GR32:$src2)), - (implicit EFLAGS)]>; + [(set GR32:$dst, EFLAGS, (X86add_flag GR32:$src1, + GR32:$src2))]>; } // end isConvertibleToThreeAddress } // end isCommutable @@ -2723,47 +2714,47 @@ let isCodeGenOnly = 1 in { def ADD8rm : I<0x02, MRMSrcMem, (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2), "add{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (add GR8:$src1, (load addr:$src2))), - (implicit EFLAGS)]>; + [(set GR8:$dst, EFLAGS, (X86add_flag GR8:$src1, + (load addr:$src2)))]>; def ADD16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), "add{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (add GR16:$src1, (load addr:$src2))), - (implicit EFLAGS)]>, OpSize; + [(set GR16:$dst, EFLAGS, (X86add_flag GR16:$src1, + (load addr:$src2)))]>, OpSize; def ADD32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), "add{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (add GR32:$src1, (load addr:$src2))), - (implicit EFLAGS)]>; + [(set GR32:$dst, EFLAGS, (X86add_flag GR32:$src1, + (load addr:$src2)))]>; // Register-Integer Addition def ADD8ri : Ii8<0x80, MRM0r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), "add{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (add GR8:$src1, imm:$src2)), - (implicit EFLAGS)]>; + [(set GR8:$dst, EFLAGS, + (X86add_flag GR8:$src1, imm:$src2))]>; let isConvertibleToThreeAddress = 1 in { // Can transform into LEA. // Register-Integer Addition def ADD16ri : Ii16<0x81, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), "add{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (add GR16:$src1, imm:$src2)), - (implicit EFLAGS)]>, OpSize; + [(set GR16:$dst, EFLAGS, + (X86add_flag GR16:$src1, imm:$src2))]>, OpSize; def ADD32ri : Ii32<0x81, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), "add{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (add GR32:$src1, imm:$src2)), - (implicit EFLAGS)]>; + [(set GR32:$dst, EFLAGS, + (X86add_flag GR32:$src1, imm:$src2))]>; def ADD16ri8 : Ii8<0x83, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "add{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (add GR16:$src1, i16immSExt8:$src2)), - (implicit EFLAGS)]>, OpSize; + [(set GR16:$dst, EFLAGS, + (X86add_flag GR16:$src1, i16immSExt8:$src2))]>, OpSize; def ADD32ri8 : Ii8<0x83, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), "add{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (add GR32:$src1, i32immSExt8:$src2)), - (implicit EFLAGS)]>; + [(set GR32:$dst, EFLAGS, + (X86add_flag GR32:$src1, i32immSExt8:$src2))]>; } let isTwoAddress = 0 in { @@ -2911,16 +2902,16 @@ let isTwoAddress = 0 in { // Register-Register Subtraction def SUB8rr : I<0x28, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), "sub{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (sub GR8:$src1, GR8:$src2)), - (implicit EFLAGS)]>; + [(set GR8:$dst, EFLAGS, + (X86sub_flag GR8:$src1, GR8:$src2))]>; def SUB16rr : I<0x29, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2), "sub{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (sub GR16:$src1, GR16:$src2)), - (implicit EFLAGS)]>, OpSize; + [(set GR16:$dst, EFLAGS, + (X86sub_flag GR16:$src1, GR16:$src2))]>, OpSize; def SUB32rr : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2), "sub{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (sub GR32:$src1, GR32:$src2)), - (implicit EFLAGS)]>; + [(set GR32:$dst, EFLAGS, + (X86sub_flag GR32:$src1, GR32:$src2))]>; def SUB8rr_REV : I<0x2A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), "sub{b}\t{$src2, $dst|$dst, $src2}", []>; @@ -2935,45 +2926,45 @@ def SUB32rr_REV : I<0x2B, MRMSrcReg, (outs GR32:$dst), def SUB8rm : I<0x2A, MRMSrcMem, (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2), "sub{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2))), - (implicit EFLAGS)]>; + [(set GR8:$dst, EFLAGS, + (X86sub_flag GR8:$src1, (load addr:$src2)))]>; def SUB16rm : I<0x2B, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), "sub{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2))), - (implicit EFLAGS)]>, OpSize; + [(set GR16:$dst, EFLAGS, + (X86sub_flag GR16:$src1, (load addr:$src2)))]>, OpSize; def SUB32rm : I<0x2B, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), "sub{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (sub GR32:$src1, (load addr:$src2))), - (implicit EFLAGS)]>; + [(set GR32:$dst, EFLAGS, + (X86sub_flag GR32:$src1, (load addr:$src2)))]>; // Register-Integer Subtraction def SUB8ri : Ii8 <0x80, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), "sub{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (sub GR8:$src1, imm:$src2)), - (implicit EFLAGS)]>; + [(set GR8:$dst, EFLAGS, + (X86sub_flag GR8:$src1, imm:$src2))]>; def SUB16ri : Ii16<0x81, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), "sub{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (sub GR16:$src1, imm:$src2)), - (implicit EFLAGS)]>, OpSize; + [(set GR16:$dst, EFLAGS, + (X86sub_flag GR16:$src1, imm:$src2))]>, OpSize; def SUB32ri : Ii32<0x81, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), "sub{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (sub GR32:$src1, imm:$src2)), - (implicit EFLAGS)]>; + [(set GR32:$dst, EFLAGS, + (X86sub_flag GR32:$src1, imm:$src2))]>; def SUB16ri8 : Ii8<0x83, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "sub{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (sub GR16:$src1, i16immSExt8:$src2)), - (implicit EFLAGS)]>, OpSize; + [(set GR16:$dst, EFLAGS, + (X86sub_flag GR16:$src1, i16immSExt8:$src2))]>, OpSize; def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), "sub{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (sub GR32:$src1, i32immSExt8:$src2)), - (implicit EFLAGS)]>; + [(set GR32:$dst, EFLAGS, + (X86sub_flag GR32:$src1, i32immSExt8:$src2))]>; let isTwoAddress = 0 in { // Memory-Register Subtraction @@ -3122,25 +3113,26 @@ let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y // Register-Register Signed Integer Multiply def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2), "imul{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (mul GR16:$src1, GR16:$src2)), - (implicit EFLAGS)]>, TB, OpSize; + [(set GR16:$dst, EFLAGS, + (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize; def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2), "imul{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (mul GR32:$src1, GR32:$src2)), - (implicit EFLAGS)]>, TB; + [(set GR32:$dst, EFLAGS, + (X86smul_flag GR32:$src1, GR32:$src2))]>, TB; } // Register-Memory Signed Integer Multiply def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), "imul{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (mul GR16:$src1, (load addr:$src2))), - (implicit EFLAGS)]>, TB, OpSize; + [(set GR16:$dst, EFLAGS, + (X86smul_flag GR16:$src1, (load addr:$src2)))]>, + TB, OpSize; def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), "imul{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (mul GR32:$src1, (load addr:$src2))), - (implicit EFLAGS)]>, TB; + [(set GR32:$dst, EFLAGS, + (X86smul_flag GR32:$src1, (load addr:$src2)))]>, TB; } // Defs = [EFLAGS] } // end Two Address instructions @@ -3150,47 +3142,49 @@ let Defs = [EFLAGS] in { def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16 (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR16:$dst, (mul GR16:$src1, imm:$src2)), - (implicit EFLAGS)]>, OpSize; + [(set GR16:$dst, EFLAGS, + (X86smul_flag GR16:$src1, imm:$src2))]>, OpSize; def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32 (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, (mul GR32:$src1, imm:$src2)), - (implicit EFLAGS)]>; + [(set GR32:$dst, EFLAGS, + (X86smul_flag GR32:$src1, imm:$src2))]>; def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8 (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR16:$dst, (mul GR16:$src1, i16immSExt8:$src2)), - (implicit EFLAGS)]>, OpSize; + [(set GR16:$dst, EFLAGS, + (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>, + OpSize; def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8 (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, (mul GR32:$src1, i32immSExt8:$src2)), - (implicit EFLAGS)]>; + [(set GR32:$dst, EFLAGS, + (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>; // Memory-Integer Signed Integer Multiply def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16 (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR16:$dst, (mul (load addr:$src1), imm:$src2)), - (implicit EFLAGS)]>, OpSize; + [(set GR16:$dst, EFLAGS, + (X86smul_flag (load addr:$src1), imm:$src2))]>, + OpSize; def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32 (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, (mul (load addr:$src1), imm:$src2)), - (implicit EFLAGS)]>; + [(set GR32:$dst, EFLAGS, + (X86smul_flag (load addr:$src1), imm:$src2))]>; def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8 (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR16:$dst, (mul (load addr:$src1), - i16immSExt8:$src2)), - (implicit EFLAGS)]>, OpSize; + [(set GR16:$dst, EFLAGS, + (X86smul_flag (load addr:$src1), + i16immSExt8:$src2))]>, OpSize; def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8 (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, (mul (load addr:$src1), - i32immSExt8:$src2)), - (implicit EFLAGS)]>; + [(set GR32:$dst, EFLAGS, + (X86smul_flag (load addr:$src1), + i32immSExt8:$src2))]>; } // Defs = [EFLAGS] //===----------------------------------------------------------------------===// @@ -4345,9 +4339,12 @@ def : Pat<(X86tcret GR32_TC:$dst, imm:$off), (TCRETURNri GR32_TC:$dst, imm:$off)>, Requires<[In32BitMode]>; +// FIXME: This is disabled for 32-bit PIC mode because the global base +// register which is part of the address mode may be assigned a +// callee-saved register. def : Pat<(X86tcret (load addr:$dst), imm:$off), (TCRETURNmi addr:$dst, imm:$off)>, - Requires<[In32BitMode]>; + Requires<[In32BitMode, IsNotPIC]>; def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off), (TCRETURNdi texternalsym:$dst, imm:$off)>, @@ -4722,23 +4719,17 @@ def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. let AddedComplexity = 5 in { // Try this before the selecting to OR -def : Pat<(parallel (or_is_add GR16:$src1, imm:$src2), - (implicit EFLAGS)), +def : Pat<(or_is_add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>; -def : Pat<(parallel (or_is_add GR32:$src1, imm:$src2), - (implicit EFLAGS)), +def : Pat<(or_is_add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>; -def : Pat<(parallel (or_is_add GR16:$src1, i16immSExt8:$src2), - (implicit EFLAGS)), +def : Pat<(or_is_add GR16:$src1, i16immSExt8:$src2), (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(parallel (or_is_add GR32:$src1, i32immSExt8:$src2), - (implicit EFLAGS)), +def : Pat<(or_is_add GR32:$src1, i32immSExt8:$src2), (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; -def : Pat<(parallel (or_is_add GR16:$src1, GR16:$src2), - (implicit EFLAGS)), +def : Pat<(or_is_add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>; -def : Pat<(parallel (or_is_add GR32:$src1, GR32:$src2), - (implicit EFLAGS)), +def : Pat<(or_is_add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>; } // AddedComplexity @@ -4746,270 +4737,173 @@ def : Pat<(parallel (or_is_add GR32:$src1, GR32:$src2), // EFLAGS-defining Patterns //===----------------------------------------------------------------------===// -// Register-Register Addition with EFLAGS result -def : Pat<(parallel (X86add_flag GR8:$src1, GR8:$src2), - (implicit EFLAGS)), - (ADD8rr GR8:$src1, GR8:$src2)>; -def : Pat<(parallel (X86add_flag GR16:$src1, GR16:$src2), - (implicit EFLAGS)), - (ADD16rr GR16:$src1, GR16:$src2)>; -def : Pat<(parallel (X86add_flag GR32:$src1, GR32:$src2), - (implicit EFLAGS)), - (ADD32rr GR32:$src1, GR32:$src2)>; +// add reg, reg +def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>; +def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>; +def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>; -// Register-Memory Addition with EFLAGS result -def : Pat<(parallel (X86add_flag GR8:$src1, (loadi8 addr:$src2)), - (implicit EFLAGS)), +// add reg, mem +def : Pat<(add GR8:$src1, (loadi8 addr:$src2)), (ADD8rm GR8:$src1, addr:$src2)>; -def : Pat<(parallel (X86add_flag GR16:$src1, (loadi16 addr:$src2)), - (implicit EFLAGS)), +def : Pat<(add GR16:$src1, (loadi16 addr:$src2)), (ADD16rm GR16:$src1, addr:$src2)>; -def : Pat<(parallel (X86add_flag GR32:$src1, (loadi32 addr:$src2)), - (implicit EFLAGS)), +def : Pat<(add GR32:$src1, (loadi32 addr:$src2)), (ADD32rm GR32:$src1, addr:$src2)>; -// Register-Integer Addition with EFLAGS result -def : Pat<(parallel (X86add_flag GR8:$src1, imm:$src2), - (implicit EFLAGS)), - (ADD8ri GR8:$src1, imm:$src2)>; -def : Pat<(parallel (X86add_flag GR16:$src1, imm:$src2), - (implicit EFLAGS)), - (ADD16ri GR16:$src1, imm:$src2)>; -def : Pat<(parallel (X86add_flag GR32:$src1, imm:$src2), - (implicit EFLAGS)), - (ADD32ri GR32:$src1, imm:$src2)>; -def : Pat<(parallel (X86add_flag GR16:$src1, i16immSExt8:$src2), - (implicit EFLAGS)), +// add reg, imm +def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>; +def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>; +def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>; +def : Pat<(add GR16:$src1, i16immSExt8:$src2), (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(parallel (X86add_flag GR32:$src1, i32immSExt8:$src2), - (implicit EFLAGS)), +def : Pat<(add GR32:$src1, i32immSExt8:$src2), (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; -// Register-Register Subtraction with EFLAGS result -def : Pat<(parallel (X86sub_flag GR8:$src1, GR8:$src2), - (implicit EFLAGS)), - (SUB8rr GR8:$src1, GR8:$src2)>; -def : Pat<(parallel (X86sub_flag GR16:$src1, GR16:$src2), - (implicit EFLAGS)), - (SUB16rr GR16:$src1, GR16:$src2)>; -def : Pat<(parallel (X86sub_flag GR32:$src1, GR32:$src2), - (implicit EFLAGS)), - (SUB32rr GR32:$src1, GR32:$src2)>; +// sub reg, reg +def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>; +def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>; +def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>; -// Register-Memory Subtraction with EFLAGS result -def : Pat<(parallel (X86sub_flag GR8:$src1, (loadi8 addr:$src2)), - (implicit EFLAGS)), +// sub reg, mem +def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)), (SUB8rm GR8:$src1, addr:$src2)>; -def : Pat<(parallel (X86sub_flag GR16:$src1, (loadi16 addr:$src2)), - (implicit EFLAGS)), +def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)), (SUB16rm GR16:$src1, addr:$src2)>; -def : Pat<(parallel (X86sub_flag GR32:$src1, (loadi32 addr:$src2)), - (implicit EFLAGS)), +def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)), (SUB32rm GR32:$src1, addr:$src2)>; -// Register-Integer Subtraction with EFLAGS result -def : Pat<(parallel (X86sub_flag GR8:$src1, imm:$src2), - (implicit EFLAGS)), +// sub reg, imm +def : Pat<(sub GR8:$src1, imm:$src2), (SUB8ri GR8:$src1, imm:$src2)>; -def : Pat<(parallel (X86sub_flag GR16:$src1, imm:$src2), - (implicit EFLAGS)), +def : Pat<(sub GR16:$src1, imm:$src2), (SUB16ri GR16:$src1, imm:$src2)>; -def : Pat<(parallel (X86sub_flag GR32:$src1, imm:$src2), - (implicit EFLAGS)), +def : Pat<(sub GR32:$src1, imm:$src2), (SUB32ri GR32:$src1, imm:$src2)>; -def : Pat<(parallel (X86sub_flag GR16:$src1, i16immSExt8:$src2), - (implicit EFLAGS)), +def : Pat<(sub GR16:$src1, i16immSExt8:$src2), (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(parallel (X86sub_flag GR32:$src1, i32immSExt8:$src2), - (implicit EFLAGS)), +def : Pat<(sub GR32:$src1, i32immSExt8:$src2), (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>; -// Register-Register Signed Integer Multiply with EFLAGS result -def : Pat<(parallel (X86smul_flag GR16:$src1, GR16:$src2), - (implicit EFLAGS)), +// mul reg, reg +def : Pat<(mul GR16:$src1, GR16:$src2), (IMUL16rr GR16:$src1, GR16:$src2)>; -def : Pat<(parallel (X86smul_flag GR32:$src1, GR32:$src2), - (implicit EFLAGS)), +def : Pat<(mul GR32:$src1, GR32:$src2), (IMUL32rr GR32:$src1, GR32:$src2)>; -// Register-Memory Signed Integer Multiply with EFLAGS result -def : Pat<(parallel (X86smul_flag GR16:$src1, (loadi16 addr:$src2)), - (implicit EFLAGS)), +// mul reg, mem +def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)), (IMUL16rm GR16:$src1, addr:$src2)>; -def : Pat<(parallel (X86smul_flag GR32:$src1, (loadi32 addr:$src2)), - (implicit EFLAGS)), +def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)), (IMUL32rm GR32:$src1, addr:$src2)>; -// Register-Integer Signed Integer Multiply with EFLAGS result -def : Pat<(parallel (X86smul_flag GR16:$src1, imm:$src2), - (implicit EFLAGS)), +// mul reg, imm +def : Pat<(mul GR16:$src1, imm:$src2), (IMUL16rri GR16:$src1, imm:$src2)>; -def : Pat<(parallel (X86smul_flag GR32:$src1, imm:$src2), - (implicit EFLAGS)), +def : Pat<(mul GR32:$src1, imm:$src2), (IMUL32rri GR32:$src1, imm:$src2)>; -def : Pat<(parallel (X86smul_flag GR16:$src1, i16immSExt8:$src2), - (implicit EFLAGS)), +def : Pat<(mul GR16:$src1, i16immSExt8:$src2), (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(parallel (X86smul_flag GR32:$src1, i32immSExt8:$src2), - (implicit EFLAGS)), +def : Pat<(mul GR32:$src1, i32immSExt8:$src2), (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>; -// Memory-Integer Signed Integer Multiply with EFLAGS result -def : Pat<(parallel (X86smul_flag (loadi16 addr:$src1), imm:$src2), - (implicit EFLAGS)), +// reg = mul mem, imm +def : Pat<(mul (loadi16 addr:$src1), imm:$src2), (IMUL16rmi addr:$src1, imm:$src2)>; -def : Pat<(parallel (X86smul_flag (loadi32 addr:$src1), imm:$src2), - (implicit EFLAGS)), +def : Pat<(mul (loadi32 addr:$src1), imm:$src2), (IMUL32rmi addr:$src1, imm:$src2)>; -def : Pat<(parallel (X86smul_flag (loadi16 addr:$src1), i16immSExt8:$src2), - (implicit EFLAGS)), +def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2), (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>; -def : Pat<(parallel (X86smul_flag (loadi32 addr:$src1), i32immSExt8:$src2), - (implicit EFLAGS)), +def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2), (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>; // Optimize multiply by 2 with EFLAGS result. let AddedComplexity = 2 in { -def : Pat<(parallel (X86smul_flag GR16:$src1, 2), - (implicit EFLAGS)), - (ADD16rr GR16:$src1, GR16:$src1)>; - -def : Pat<(parallel (X86smul_flag GR32:$src1, 2), - (implicit EFLAGS)), - (ADD32rr GR32:$src1, GR32:$src1)>; +def : Pat<(X86smul_flag GR16:$src1, 2), (ADD16rr GR16:$src1, GR16:$src1)>; +def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>; } -// INC and DEC with EFLAGS result. Note that these do not set CF. -def : Pat<(parallel (X86inc_flag GR8:$src), (implicit EFLAGS)), - (INC8r GR8:$src)>; -def : Pat<(parallel (X86dec_flag GR8:$src), (implicit EFLAGS)), - (DEC8r GR8:$src)>; - -def : Pat<(parallel (X86inc_flag GR16:$src), (implicit EFLAGS)), - (INC16r GR16:$src)>, Requires<[In32BitMode]>; -def : Pat<(parallel (X86dec_flag GR16:$src), (implicit EFLAGS)), - (DEC16r GR16:$src)>, Requires<[In32BitMode]>; - -def : Pat<(parallel (X86inc_flag GR32:$src), (implicit EFLAGS)), - (INC32r GR32:$src)>, Requires<[In32BitMode]>; -def : Pat<(parallel (X86dec_flag GR32:$src), (implicit EFLAGS)), - (DEC32r GR32:$src)>, Requires<[In32BitMode]>; - -// Register-Register Or with EFLAGS result -def : Pat<(parallel (X86or_flag GR8:$src1, GR8:$src2), - (implicit EFLAGS)), - (OR8rr GR8:$src1, GR8:$src2)>; -def : Pat<(parallel (X86or_flag GR16:$src1, GR16:$src2), - (implicit EFLAGS)), - (OR16rr GR16:$src1, GR16:$src2)>; -def : Pat<(parallel (X86or_flag GR32:$src1, GR32:$src2), - (implicit EFLAGS)), - (OR32rr GR32:$src1, GR32:$src2)>; - -// Register-Memory Or with EFLAGS result -def : Pat<(parallel (X86or_flag GR8:$src1, (loadi8 addr:$src2)), - (implicit EFLAGS)), +// Patterns for nodes that do not produce flags, for instructions that do. + +// Increment reg. +def : Pat<(add GR8:$src , 1), (INC8r GR8:$src)>; +def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[In32BitMode]>; +def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[In32BitMode]>; + +// Decrement reg. +def : Pat<(add GR8:$src , -1), (DEC8r GR8:$src)>; +def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[In32BitMode]>; +def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[In32BitMode]>; + +// or reg/reg. +def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>; +def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>; +def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>; + +// or reg/mem +def : Pat<(or GR8:$src1, (loadi8 addr:$src2)), (OR8rm GR8:$src1, addr:$src2)>; -def : Pat<(parallel (X86or_flag GR16:$src1, (loadi16 addr:$src2)), - (implicit EFLAGS)), +def : Pat<(or GR16:$src1, (loadi16 addr:$src2)), (OR16rm GR16:$src1, addr:$src2)>; -def : Pat<(parallel (X86or_flag GR32:$src1, (loadi32 addr:$src2)), - (implicit EFLAGS)), +def : Pat<(or GR32:$src1, (loadi32 addr:$src2)), (OR32rm GR32:$src1, addr:$src2)>; -// Register-Integer Or with EFLAGS result -def : Pat<(parallel (X86or_flag GR8:$src1, imm:$src2), - (implicit EFLAGS)), - (OR8ri GR8:$src1, imm:$src2)>; -def : Pat<(parallel (X86or_flag GR16:$src1, imm:$src2), - (implicit EFLAGS)), - (OR16ri GR16:$src1, imm:$src2)>; -def : Pat<(parallel (X86or_flag GR32:$src1, imm:$src2), - (implicit EFLAGS)), - (OR32ri GR32:$src1, imm:$src2)>; -def : Pat<(parallel (X86or_flag GR16:$src1, i16immSExt8:$src2), - (implicit EFLAGS)), +// or reg/imm +def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>; +def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>; +def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>; +def : Pat<(or GR16:$src1, i16immSExt8:$src2), (OR16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(parallel (X86or_flag GR32:$src1, i32immSExt8:$src2), - (implicit EFLAGS)), +def : Pat<(or GR32:$src1, i32immSExt8:$src2), (OR32ri8 GR32:$src1, i32immSExt8:$src2)>; -// Register-Register XOr with EFLAGS result -def : Pat<(parallel (X86xor_flag GR8:$src1, GR8:$src2), - (implicit EFLAGS)), - (XOR8rr GR8:$src1, GR8:$src2)>; -def : Pat<(parallel (X86xor_flag GR16:$src1, GR16:$src2), - (implicit EFLAGS)), - (XOR16rr GR16:$src1, GR16:$src2)>; -def : Pat<(parallel (X86xor_flag GR32:$src1, GR32:$src2), - (implicit EFLAGS)), - (XOR32rr GR32:$src1, GR32:$src2)>; - -// Register-Memory XOr with EFLAGS result -def : Pat<(parallel (X86xor_flag GR8:$src1, (loadi8 addr:$src2)), - (implicit EFLAGS)), +// xor reg/reg +def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>; +def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>; +def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>; + +// xor reg/mem +def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)), (XOR8rm GR8:$src1, addr:$src2)>; -def : Pat<(parallel (X86xor_flag GR16:$src1, (loadi16 addr:$src2)), - (implicit EFLAGS)), +def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)), (XOR16rm GR16:$src1, addr:$src2)>; -def : Pat<(parallel (X86xor_flag GR32:$src1, (loadi32 addr:$src2)), - (implicit EFLAGS)), +def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)), (XOR32rm GR32:$src1, addr:$src2)>; -// Register-Integer XOr with EFLAGS result -def : Pat<(parallel (X86xor_flag GR8:$src1, imm:$src2), - (implicit EFLAGS)), +// xor reg/imm +def : Pat<(xor GR8:$src1, imm:$src2), (XOR8ri GR8:$src1, imm:$src2)>; -def : Pat<(parallel (X86xor_flag GR16:$src1, imm:$src2), - (implicit EFLAGS)), +def : Pat<(xor GR16:$src1, imm:$src2), (XOR16ri GR16:$src1, imm:$src2)>; -def : Pat<(parallel (X86xor_flag GR32:$src1, imm:$src2), - (implicit EFLAGS)), +def : Pat<(xor GR32:$src1, imm:$src2), (XOR32ri GR32:$src1, imm:$src2)>; -def : Pat<(parallel (X86xor_flag GR16:$src1, i16immSExt8:$src2), - (implicit EFLAGS)), +def : Pat<(xor GR16:$src1, i16immSExt8:$src2), (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(parallel (X86xor_flag GR32:$src1, i32immSExt8:$src2), - (implicit EFLAGS)), +def : Pat<(xor GR32:$src1, i32immSExt8:$src2), (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>; -// Register-Register And with EFLAGS result -def : Pat<(parallel (X86and_flag GR8:$src1, GR8:$src2), - (implicit EFLAGS)), - (AND8rr GR8:$src1, GR8:$src2)>; -def : Pat<(parallel (X86and_flag GR16:$src1, GR16:$src2), - (implicit EFLAGS)), - (AND16rr GR16:$src1, GR16:$src2)>; -def : Pat<(parallel (X86and_flag GR32:$src1, GR32:$src2), - (implicit EFLAGS)), - (AND32rr GR32:$src1, GR32:$src2)>; - -// Register-Memory And with EFLAGS result -def : Pat<(parallel (X86and_flag GR8:$src1, (loadi8 addr:$src2)), - (implicit EFLAGS)), +// and reg/reg +def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>; +def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>; +def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>; + +// and reg/mem +def : Pat<(and GR8:$src1, (loadi8 addr:$src2)), (AND8rm GR8:$src1, addr:$src2)>; -def : Pat<(parallel (X86and_flag GR16:$src1, (loadi16 addr:$src2)), - (implicit EFLAGS)), +def : Pat<(and GR16:$src1, (loadi16 addr:$src2)), (AND16rm GR16:$src1, addr:$src2)>; -def : Pat<(parallel (X86and_flag GR32:$src1, (loadi32 addr:$src2)), - (implicit EFLAGS)), +def : Pat<(and GR32:$src1, (loadi32 addr:$src2)), (AND32rm GR32:$src1, addr:$src2)>; -// Register-Integer And with EFLAGS result -def : Pat<(parallel (X86and_flag GR8:$src1, imm:$src2), - (implicit EFLAGS)), +// and reg/imm +def : Pat<(and GR8:$src1, imm:$src2), (AND8ri GR8:$src1, imm:$src2)>; -def : Pat<(parallel (X86and_flag GR16:$src1, imm:$src2), - (implicit EFLAGS)), +def : Pat<(and GR16:$src1, imm:$src2), (AND16ri GR16:$src1, imm:$src2)>; -def : Pat<(parallel (X86and_flag GR32:$src1, imm:$src2), - (implicit EFLAGS)), +def : Pat<(and GR32:$src1, imm:$src2), (AND32ri GR32:$src1, imm:$src2)>; -def : Pat<(parallel (X86and_flag GR16:$src1, i16immSExt8:$src2), - (implicit EFLAGS)), +def : Pat<(and GR16:$src1, i16immSExt8:$src2), (AND16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(parallel (X86and_flag GR32:$src1, i32immSExt8:$src2), - (implicit EFLAGS)), +def : Pat<(and GR32:$src1, i32immSExt8:$src2), (AND32ri8 GR32:$src1, i32immSExt8:$src2)>; // -disable-16bit support. diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index e1203e2..1c81c5e 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -605,22 +605,9 @@ let AddedComplexity = 10 in { def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))), VR64:$src2)), (MMX_PANDNrr VR64:$src1, VR64:$src2)>; -def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v4i16 immAllOnesV_bc))), - VR64:$src2)), - (MMX_PANDNrr VR64:$src1, VR64:$src2)>; -def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8 immAllOnesV_bc))), - VR64:$src2)), - (MMX_PANDNrr VR64:$src1, VR64:$src2)>; - def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))), (load addr:$src2))), (MMX_PANDNrm VR64:$src1, addr:$src2)>; -def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v4i16 immAllOnesV_bc))), - (load addr:$src2))), - (MMX_PANDNrm VR64:$src1, addr:$src2)>; -def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8 immAllOnesV_bc))), - (load addr:$src2))), - (MMX_PANDNrm VR64:$src1, addr:$src2)>; // Move MMX to lower 64-bit of XMM def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v8i8 VR64:$src))))), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 720b663..dadc2a6 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -69,8 +69,9 @@ def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>; def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>; def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>; -def SDTX86CmpPTest : SDTypeProfile<0, 2, [SDTCisVT<0, v4f32>, - SDTCisVT<1, v4f32>]>; +def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, + SDTCisVT<1, v4f32>, + SDTCisVT<2, v4f32>]>; def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; //===----------------------------------------------------------------------===// @@ -1114,15 +1115,19 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), // load of an all-zeros value if folding it would be beneficial. // FIXME: Change encoding to pseudo! let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1 in -def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", + isCodeGenOnly = 1 in { +def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4f32 immAllZerosV))]>; +def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v2f64 immAllZerosV))]>; +let ExeDomain = SSEPackedInt in +def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllZerosV))]>; +} -def : Pat<(v2i64 immAllZerosV), (V_SET0)>; -def : Pat<(v8i16 immAllZerosV), (V_SET0)>; -def : Pat<(v16i8 immAllZerosV), (V_SET0)>; -def : Pat<(v2f64 immAllZerosV), (V_SET0)>; -def : Pat<(v4f32 immAllZerosV), (V_SET0)>; +def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>; +def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>; +def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>; def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>; @@ -1935,6 +1940,7 @@ let Constraints = "$src1 = $dst" in { //===---------------------------------------------------------------------===// // SSE integer instructions +let ExeDomain = SSEPackedInt in { // Move Instructions let neverHasSideEffects = 1 in @@ -2043,6 +2049,7 @@ multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode, } } // Constraints = "$src1 = $dst" +} // ExeDomain = SSEPackedInt // 128-bit Integer Arithmetic @@ -2105,7 +2112,8 @@ defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", int_x86_sse2_psra_d, int_x86_sse2_psrai_d>; // 128-bit logical shifts. -let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { +let Constraints = "$src1 = $dst", neverHasSideEffects = 1, + ExeDomain = SSEPackedInt in { def PSLLDQri : PDIi8<0x73, MRM7r, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), "pslldq\t{$src2, $dst|$dst, $src2}", []>; @@ -2139,7 +2147,7 @@ defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>; defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or , 1>; defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>; -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", ExeDomain = SSEPackedInt in { def PANDNrr : PDI<0xDF, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "pandn\t{$src2, $dst|$dst, $src2}", @@ -2193,6 +2201,8 @@ defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>; defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>; defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>; +let ExeDomain = SSEPackedInt in { + // Shuffle and unpack instructions let AddedComplexity = 5 in { def PSHUFDri : PDIi8<0x70, MRMSrcReg, @@ -2369,10 +2379,13 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), "maskmovdqu\t{$mask, $src|$src, $mask}", [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>; +} // ExeDomain = SSEPackedInt + // Non-temporal stores def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movntpd\t{$src, $dst|$dst, $src}", [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>; +let ExeDomain = SSEPackedInt in def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntdq\t{$src, $dst|$dst, $src}", [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>; @@ -2386,6 +2399,7 @@ def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntpd\t{$src, $dst|$dst, $src}", [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>; +let ExeDomain = SSEPackedInt in def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntdq\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; @@ -2414,7 +2428,7 @@ def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-ones value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1 in + isCodeGenOnly = 1, ExeDomain = SSEPackedInt in // FIXME: Change encoding to pseudo. def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>; @@ -3016,14 +3030,14 @@ let Predicates = [HasSSE2] in { let AddedComplexity = 15 in { // Zeroing a VR128 then do a MOVS{S|D} to the lower bits. def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), - (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>; + (MOVSDrr (v2f64 (V_SET0PS)), FR64:$src)>; def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), - (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>; + (MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>; def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), - (MOVSSrr (v4f32 (V_SET0)), + (MOVSSrr (v4f32 (V_SET0PS)), (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), - (MOVSSrr (v4i32 (V_SET0)), + (MOVSSrr (v4i32 (V_SET0PI)), (EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>; } @@ -3181,9 +3195,6 @@ def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))), (SHUFFLE_get_shuf_imm VR128:$src3))>; // Set lowest element and zero upper elements. -let AddedComplexity = 15 in -def : Pat<(v2f64 (movl immAllZerosV_bc, VR128:$src)), - (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>; def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>; @@ -3441,8 +3452,28 @@ let Constraints = "$src1 = $dst" in { OpSize; } } -defm PMULLD : SS41I_binop_patint<0x40, "pmulld", v4i32, mul, - int_x86_sse41_pmulld, 1>; + +/// SS48I_binop_rm - Simple SSE41 binary operator. +let Constraints = "$src1 = $dst" in { +multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT, bit Commutable = 0> { + def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>, + OpSize { + let isCommutable = Commutable; + } + def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + [(set VR128:$dst, (OpNode VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2))))]>, + OpSize; +} +} + +defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, 1>; /// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate let Constraints = "$src1 = $dst" in { @@ -3772,12 +3803,12 @@ def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), let Defs = [EFLAGS] in { def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "ptest \t{$src2, $src1|$src1, $src2}", - [(X86ptest VR128:$src1, VR128:$src2), - (implicit EFLAGS)]>, OpSize; + [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>, + OpSize; def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), "ptest \t{$src2, $src1|$src1, $src2}", - [(X86ptest VR128:$src1, (load addr:$src2)), - (implicit EFLAGS)]>, OpSize; + [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>, + OpSize; } def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), @@ -3817,6 +3848,53 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)), def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), (PCMPGTQrm VR128:$src1, addr:$src2)>; +// TODO: These should be AES as a feature set. +defm AESIMC : SS42I_binop_rm_int<0xDB, "aesimc", + int_x86_aesni_aesimc>; +defm AESENC : SS42I_binop_rm_int<0xDC, "aesenc", + int_x86_aesni_aesenc>; +defm AESENCLAST : SS42I_binop_rm_int<0xDD, "aesenclast", + int_x86_aesni_aesenclast>; +defm AESDEC : SS42I_binop_rm_int<0xDE, "aesdec", + int_x86_aesni_aesdec>; +defm AESDECLAST : SS42I_binop_rm_int<0xDF, "aesdeclast", + int_x86_aesni_aesdeclast>; + +def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, VR128:$src2)), + (AESIMCrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, (memop addr:$src2))), + (AESIMCrm VR128:$src1, addr:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)), + (AESENCrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))), + (AESENCrm VR128:$src1, addr:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)), + (AESENCLASTrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))), + (AESENCLASTrm VR128:$src1, addr:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)), + (AESDECrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))), + (AESDECrm VR128:$src1, addr:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)), + (AESDECLASTrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))), + (AESDECLASTrm VR128:$src1, addr:$src2)>; + +def AESKEYGENASSIST128rr : SS42AI<0xDF, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, i32i8imm:$src2), + "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, + OpSize; +def AESKEYGENASSIST128rm : SS42AI<0xDF, MRMSrcMem, (outs VR128:$dst), + (ins i128mem:$src1, i32i8imm:$src2), + "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)), + imm:$src2))]>, + OpSize; + // crc intrinsic instruction // This set of instructions are only rm, the only difference is the size // of r and m. diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index cd56816..8a0cde4 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -266,6 +266,9 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { unsigned Model = 0; DetectFamilyModel(EAX, Family, Model); IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13); + // If it's Nehalem, unaligned memory access is fast. + if (Family == 15 && Model == 26) + IsUAMemFast = true; GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); HasX86_64 = (EDX >> 29) & 0x1; @@ -286,6 +289,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, , HasFMA3(false) , HasFMA4(false) , IsBTMemSlow(false) + , IsUAMemFast(false) , HasVectorUAMem(false) , DarwinVers(0) , stackAlignment(8) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 56220db..bf30154 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -78,6 +78,9 @@ protected: /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; + /// IsUAMemFast - True if unaligned memory access is fast. + bool IsUAMemFast; + /// HasVectorUAMem - True if SIMD operations can have unaligned memory /// operands. This may require setting a feature bit in the /// processor. @@ -148,6 +151,7 @@ public: bool hasFMA3() const { return HasFMA3; } bool hasFMA4() const { return HasFMA4; } bool isBTMemSlow() const { return IsBTMemSlow; } + bool isUnalignedMemAccessFast() const { return IsUAMemFast; } bool hasVectorUAMem() const { return HasVectorUAMem; } bool isTargetDarwin() const { return TargetType == isDarwin; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index f13e6f3..c608e56 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -17,6 +17,7 @@ #include "llvm/PassManager.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" @@ -169,6 +170,15 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM, return true; // -print-machineinstr should print after this. } +bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) { + PM.add(createSSEDomainFixPass()); + return true; + } + return false; +} + bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE) { diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 2bb5454..ae7b5b2 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -66,6 +66,7 @@ public: virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE); }; diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index e5f5a6d..54df33c 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -215,7 +215,15 @@ XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) + if (I == MBB.begin()) + return false; + --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return false; + --I; + } + if (!isUnpredicatedTerminator(I)) return false; // Get the last instruction in the block. @@ -326,6 +334,11 @@ XCoreInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return 0; --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return 0; + --I; + } if (!IsBRU(I->getOpcode()) && !IsCondBranch(I->getOpcode())) return 0; diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 2e9a1e5..dd3cbc1 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -32,7 +32,7 @@ def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink, [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, SDNPVariadic]>; -def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTNone, +def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTBrind, [SDNPHasChain, SDNPOptInFlag]>; def SDT_XCoreBR_JT : SDTypeProfile<0, 2, diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 7cb1367..40a87e8 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -623,6 +623,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, SmallVector<Value*, 16> Args; while (!F->use_empty()) { CallSite CS = CallSite::get(F->use_back()); + assert(CS.getCalledFunction() == F); Instruction *Call = CS.getInstruction(); const AttrListPtr &CallPAL = CS.getAttributes(); @@ -660,7 +661,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Non-dead argument: insert GEPs and loads as appropriate. ScalarizeTable &ArgIndices = ScalarizedElements[I]; // Store the Value* version of the indices in here, but declare it now - // for reuse + // for reuse. std::vector<Value*> Ops; for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { @@ -677,16 +678,20 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, Type::getInt32Ty(F->getContext()) : Type::getInt64Ty(F->getContext())); Ops.push_back(ConstantInt::get(IdxTy, *II)); - // Keep track of the type we're currently indexing + // Keep track of the type we're currently indexing. ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II); } - // And create a GEP to extract those indices + // And create a GEP to extract those indices. V = GetElementPtrInst::Create(V, Ops.begin(), Ops.end(), V->getName()+".idx", Call); Ops.clear(); AA.copyValue(OrigLoad->getOperand(0), V); } - Args.push_back(new LoadInst(V, V->getName()+".val", Call)); + // Since we're replacing a load make sure we take the alignment + // of the previous load. + LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call); + newLoad->setAlignment(OrigLoad->getAlignment()); + Args.push_back(newLoad); AA.copyValue(OrigLoad, Args.back()); } } @@ -694,7 +699,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, if (ExtraArgHack) Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext()))); - // Push any varargs arguments on the list + // Push any varargs arguments on the list. for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { Args.push_back(*AI); if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex)) diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index f386ed7..227602d 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -50,7 +50,7 @@ namespace { /// argument. Used so that arguments and return values can be used /// interchangably. struct RetOrArg { - RetOrArg(const Function* F, unsigned Idx, bool IsArg) : F(F), Idx(Idx), + RetOrArg(const Function *F, unsigned Idx, bool IsArg) : F(F), Idx(Idx), IsArg(IsArg) {} const Function *F; unsigned Idx; @@ -72,7 +72,7 @@ namespace { } std::string getDescription() const { - return std::string((IsArg ? "Argument #" : "Return value #")) + return std::string((IsArg ? "Argument #" : "Return value #")) + utostr(Idx) + " of function " + F->getNameStr(); } }; @@ -129,11 +129,11 @@ namespace { private: Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses); - Liveness SurveyUse(Value::use_iterator U, UseVector &MaybeLiveUses, + Liveness SurveyUse(Value::const_use_iterator U, UseVector &MaybeLiveUses, unsigned RetValNum = 0); - Liveness SurveyUses(Value *V, UseVector &MaybeLiveUses); + Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses); - void SurveyFunction(Function &F); + void SurveyFunction(const Function &F); void MarkValue(const RetOrArg &RA, Liveness L, const UseVector &MaybeLiveUses); void MarkLive(const RetOrArg &RA); @@ -196,7 +196,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { // Start by computing a new prototype for the function, which is the same as // the old function, but doesn't have isVarArg set. const FunctionType *FTy = Fn.getFunctionType(); - + std::vector<const Type*> Params(FTy->param_begin(), FTy->param_end()); FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false); @@ -225,7 +225,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { SmallVector<AttributeWithIndex, 8> AttributesVec; for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i) AttributesVec.push_back(PAL.getSlot(i)); - if (Attributes FnAttrs = PAL.getFnAttributes()) + if (Attributes FnAttrs = PAL.getFnAttributes()) AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); PAL = AttrListPtr::get(AttributesVec.begin(), AttributesVec.end()); } @@ -280,7 +280,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { /// for void functions and 1 for functions not returning a struct. It returns /// the number of struct elements for functions returning a struct. static unsigned NumRetVals(const Function *F) { - if (F->getReturnType() == Type::getVoidTy(F->getContext())) + if (F->getReturnType()->isVoidTy()) return 0; else if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) return STy->getNumElements(); @@ -305,15 +305,15 @@ DAE::Liveness DAE::MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses) { /// SurveyUse - This looks at a single use of an argument or return value /// and determines if it should be alive or not. Adds this use to MaybeLiveUses -/// if it causes the used value to become MaybeAlive. +/// if it causes the used value to become MaybeLive. /// /// RetValNum is the return value number to use when this use is used in a /// return instruction. This is used in the recursion, you should always leave /// it at 0. -DAE::Liveness DAE::SurveyUse(Value::use_iterator U, UseVector &MaybeLiveUses, - unsigned RetValNum) { - Value *V = *U; - if (ReturnInst *RI = dyn_cast<ReturnInst>(V)) { +DAE::Liveness DAE::SurveyUse(Value::const_use_iterator U, + UseVector &MaybeLiveUses, unsigned RetValNum) { + const User *V = *U; + if (const ReturnInst *RI = dyn_cast<ReturnInst>(V)) { // The value is returned from a function. It's only live when the // function's return value is live. We use RetValNum here, for the case // that U is really a use of an insertvalue instruction that uses the @@ -322,7 +322,7 @@ DAE::Liveness DAE::SurveyUse(Value::use_iterator U, UseVector &MaybeLiveUses, // We might be live, depending on the liveness of Use. return MarkIfNotLive(Use, MaybeLiveUses); } - if (InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) { + if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) { if (U.getOperandNo() != InsertValueInst::getAggregateOperandIndex() && IV->hasIndices()) // The use we are examining is inserted into an aggregate. Our liveness @@ -334,7 +334,7 @@ DAE::Liveness DAE::SurveyUse(Value::use_iterator U, UseVector &MaybeLiveUses, // we don't change RetValNum, but do survey all our uses. Liveness Result = MaybeLive; - for (Value::use_iterator I = IV->use_begin(), + for (Value::const_use_iterator I = IV->use_begin(), E = V->use_end(); I != E; ++I) { Result = SurveyUse(I, MaybeLiveUses, RetValNum); if (Result == Live) @@ -342,24 +342,24 @@ DAE::Liveness DAE::SurveyUse(Value::use_iterator U, UseVector &MaybeLiveUses, } return Result; } - CallSite CS = CallSite::get(V); - if (CS.getInstruction()) { - Function *F = CS.getCalledFunction(); + + if (ImmutableCallSite CS = V) { + const Function *F = CS.getCalledFunction(); if (F) { // Used in a direct call. - + // Find the argument number. We know for sure that this use is an // argument, since if it was the function argument this would be an // indirect call and the we know can't be looking at a value of the // label type (for the invoke instruction). - unsigned ArgNo = CS.getArgumentNo(U.getOperandNo()); + unsigned ArgNo = CS.getArgumentNo(U); if (ArgNo >= F->getFunctionType()->getNumParams()) // The value is passed in through a vararg! Must be live. return Live; - assert(CS.getArgument(ArgNo) - == CS.getInstruction()->getOperand(U.getOperandNo()) + assert(CS.getArgument(ArgNo) + == CS->getOperand(U.getOperandNo()) && "Argument is not where we expected it"); // Value passed to a normal call. It's only live when the corresponding @@ -378,11 +378,11 @@ DAE::Liveness DAE::SurveyUse(Value::use_iterator U, UseVector &MaybeLiveUses, /// Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses. If /// the result is Live, MaybeLiveUses might be modified but its content should /// be ignored (since it might not be complete). -DAE::Liveness DAE::SurveyUses(Value *V, UseVector &MaybeLiveUses) { +DAE::Liveness DAE::SurveyUses(const Value *V, UseVector &MaybeLiveUses) { // Assume it's dead (which will only hold if there are no uses at all..). Liveness Result = MaybeLive; // Check each use. - for (Value::use_iterator I = V->use_begin(), + for (Value::const_use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) { Result = SurveyUse(I, MaybeLiveUses); if (Result == Live) @@ -399,7 +399,7 @@ DAE::Liveness DAE::SurveyUses(Value *V, UseVector &MaybeLiveUses) { // We consider arguments of non-internal functions to be intrinsically alive as // well as arguments to functions which have their "address taken". // -void DAE::SurveyFunction(Function &F) { +void DAE::SurveyFunction(const Function &F) { unsigned RetCount = NumRetVals(&F); // Assume all return values are dead typedef SmallVector<Liveness, 5> RetVals; @@ -411,8 +411,8 @@ void DAE::SurveyFunction(Function &F) { // MaybeLive. Initialized to a list of RetCount empty lists. RetUses MaybeLiveRetUses(RetCount); - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) + for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) if (RI->getNumOperands() != 0 && RI->getOperand(0)->getType() != F.getFunctionType()->getReturnType()) { // We don't support old style multiple return values. @@ -431,17 +431,18 @@ void DAE::SurveyFunction(Function &F) { unsigned NumLiveRetVals = 0; const Type *STy = dyn_cast<StructType>(F.getReturnType()); // Loop all uses of the function. - for (Value::use_iterator I = F.use_begin(), E = F.use_end(); I != E; ++I) { + for (Value::const_use_iterator I = F.use_begin(), E = F.use_end(); + I != E; ++I) { // If the function is PASSED IN as an argument, its address has been // taken. - CallSite CS = CallSite::get(*I); - if (!CS.getInstruction() || !CS.isCallee(I)) { + ImmutableCallSite CS(*I); + if (!CS || !CS.isCallee(I)) { MarkLive(F); return; } // If this use is anything other than a call site, the function is alive. - Instruction *TheCall = CS.getInstruction(); + const Instruction *TheCall = CS.getInstruction(); if (!TheCall) { // Not a direct call site? MarkLive(F); return; @@ -454,9 +455,9 @@ void DAE::SurveyFunction(Function &F) { if (NumLiveRetVals != RetCount) { if (STy) { // Check all uses of the return value. - for (Value::use_iterator I = TheCall->use_begin(), + for (Value::const_use_iterator I = TheCall->use_begin(), E = TheCall->use_end(); I != E; ++I) { - ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(*I); + const ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(*I); if (Ext && Ext->hasIndices()) { // This use uses a part of our return value, survey the uses of // that part and store the results for this index only. @@ -493,7 +494,7 @@ void DAE::SurveyFunction(Function &F) { // Now, check all of our arguments. unsigned i = 0; UseVector MaybeLiveArgUses; - for (Function::arg_iterator AI = F.arg_begin(), + for (Function::const_arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E; ++AI, ++i) { // See what the effect of this use is (recording any uses that cause // MaybeLive in MaybeLiveArgUses). @@ -599,12 +600,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { const Type *RetTy = FTy->getReturnType(); const Type *NRetTy = NULL; unsigned RetCount = NumRetVals(F); - + // -1 means unused, other numbers are the new index SmallVector<int, 5> NewRetIdxs(RetCount, -1); std::vector<const Type*> RetTypes; - if (RetTy == Type::getVoidTy(F->getContext())) { - NRetTy = Type::getVoidTy(F->getContext()); + if (RetTy->isVoidTy()) { + NRetTy = RetTy; } else { const StructType *STy = dyn_cast<StructType>(RetTy); if (STy) @@ -653,10 +654,10 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // values. Otherwise, ensure that we don't have any conflicting attributes // here. Currently, this should not be possible, but special handling might be // required when new return value attributes are added. - if (NRetTy == Type::getVoidTy(F->getContext())) + if (NRetTy->isVoidTy()) RAttrs &= ~Attribute::typeIncompatible(NRetTy); else - assert((RAttrs & Attribute::typeIncompatible(NRetTy)) == 0 + assert((RAttrs & Attribute::typeIncompatible(NRetTy)) == 0 && "Return attributes no longer compatible?"); if (RAttrs) @@ -686,11 +687,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { } } - if (FnAttrs != Attribute::None) + if (FnAttrs != Attribute::None) AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); // Reconstruct the AttributesList based on the vector we constructed. - AttrListPtr NewPAL = AttrListPtr::get(AttributesVec.begin(), AttributesVec.end()); + AttrListPtr NewPAL = AttrListPtr::get(AttributesVec.begin(), + AttributesVec.end()); // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which // have zero fixed arguments. @@ -705,8 +707,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { } // Create the new function type based on the recomputed parameters. - FunctionType *NFTy = FunctionType::get(NRetTy, Params, - FTy->isVarArg()); + FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg()); // No change? if (NFTy == FTy) @@ -791,7 +792,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Return type not changed? Just replace users then. Call->replaceAllUsesWith(New); New->takeName(Call); - } else if (New->getType() == Type::getVoidTy(F->getContext())) { + } else if (New->getType()->isVoidTy()) { // Our return value has uses, but they will get removed later on. // Replace by null for now. Call->replaceAllUsesWith(Constant::getNullValue(Call->getType())); @@ -805,7 +806,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { while (isa<PHINode>(IP)) ++IP; InsertPt = IP; } - + // We used to return a struct. Instead of doing smart stuff with all the // uses of this struct, we will just rebuild it using // extract/insertvalue chaining and let instcombine clean that up. @@ -929,11 +930,11 @@ bool DAE::runOnModule(Module &M) { DEBUG(dbgs() << "DAE - Determining liveness\n"); for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) SurveyFunction(*I); - + // Now, remove all dead arguments and return values from each function in - // turn + // turn. for (Module::iterator I = M.begin(), E = M.end(); I != E; ) { - // Increment now, because the function will probably get removed (ie + // Increment now, because the function will probably get removed (ie. // replaced by a new one). Function *F = I++; Changed |= RemoveDeadStuffFromFunction(F); diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index d8e97a2..ddff5ef 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -119,7 +119,7 @@ struct GlobalStatus { /// null/false. When the first accessing function is noticed, it is recorded. /// When a second different accessing function is noticed, /// HasMultipleAccessingFunctions is set to true. - Function *AccessingFunction; + const Function *AccessingFunction; bool HasMultipleAccessingFunctions; /// HasNonInstructionUser - Set to true if this global has a user that is not @@ -140,11 +140,11 @@ struct GlobalStatus { // by constants itself. Note that constants cannot be cyclic, so this test is // pretty easy to implement recursively. // -static bool SafeToDestroyConstant(Constant *C) { +static bool SafeToDestroyConstant(const Constant *C) { if (isa<GlobalValue>(C)) return false; - for (Value::use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI) - if (Constant *CU = dyn_cast<Constant>(*UI)) { + for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI) + if (const Constant *CU = dyn_cast<Constant>(*UI)) { if (!SafeToDestroyConstant(CU)) return false; } else return false; @@ -156,26 +156,26 @@ static bool SafeToDestroyConstant(Constant *C) { /// structure. If the global has its address taken, return true to indicate we /// can't do anything with it. /// -static bool AnalyzeGlobal(Value *V, GlobalStatus &GS, - SmallPtrSet<PHINode*, 16> &PHIUsers) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) { +static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, + SmallPtrSet<const PHINode*, 16> &PHIUsers) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) { GS.HasNonInstructionUser = true; if (AnalyzeGlobal(CE, GS, PHIUsers)) return true; - } else if (Instruction *I = dyn_cast<Instruction>(*UI)) { + } else if (const Instruction *I = dyn_cast<Instruction>(*UI)) { if (!GS.HasMultipleAccessingFunctions) { - Function *F = I->getParent()->getParent(); + const Function *F = I->getParent()->getParent(); if (GS.AccessingFunction == 0) GS.AccessingFunction = F; else if (GS.AccessingFunction != F) GS.HasMultipleAccessingFunctions = true; } - if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { GS.isLoaded = true; if (LI->isVolatile()) return true; // Don't hack on volatile loads. - } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) { // Don't allow a store OF the address, only stores TO the address. if (SI->getOperand(0) == V) return true; @@ -185,14 +185,13 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS, // value, not an aggregate), keep more specific information about // stores. if (GS.StoredType != GlobalStatus::isStored) { - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(SI->getOperand(1))){ + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(SI->getOperand(1))){ Value *StoredVal = SI->getOperand(0); if (StoredVal == GV->getInitializer()) { if (GS.StoredType < GlobalStatus::isInitializerStored) GS.StoredType = GlobalStatus::isInitializerStored; } else if (isa<LoadInst>(StoredVal) && cast<LoadInst>(StoredVal)->getOperand(0) == GV) { - // G = G if (GS.StoredType < GlobalStatus::isInitializerStored) GS.StoredType = GlobalStatus::isInitializerStored; } else if (GS.StoredType < GlobalStatus::isStoredOnce) { @@ -212,7 +211,7 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS, if (AnalyzeGlobal(I, GS, PHIUsers)) return true; } else if (isa<SelectInst>(I)) { if (AnalyzeGlobal(I, GS, PHIUsers)) return true; - } else if (PHINode *PN = dyn_cast<PHINode>(I)) { + } else if (const PHINode *PN = dyn_cast<PHINode>(I)) { // PHI nodes we can check just like select or GEP instructions, but we // have to be careful about infinite recursion. if (PHIUsers.insert(PN)) // Not already visited. @@ -230,7 +229,7 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS, } else { return true; // Any other non-load instruction might take address! } - } else if (Constant *C = dyn_cast<Constant>(*UI)) { + } else if (const Constant *C = dyn_cast<Constant>(*UI)) { GS.HasNonInstructionUser = true; // We might have a dead and dangling constant hanging off of here. if (!SafeToDestroyConstant(C)) @@ -1029,23 +1028,23 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, /// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi /// of a load) are simple enough to perform heap SRA on. This permits GEP's /// that index through the array and struct field, icmps of null, and PHIs. -static bool LoadUsesSimpleEnoughForHeapSRA(Value *V, - SmallPtrSet<PHINode*, 32> &LoadUsingPHIs, - SmallPtrSet<PHINode*, 32> &LoadUsingPHIsPerLoad) { +static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, + SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs, + SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) { // We permit two users of the load: setcc comparing against the null // pointer, and a getelementptr of a specific form. - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - Instruction *User = cast<Instruction>(*UI); + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ + const Instruction *User = cast<Instruction>(*UI); // Comparison against null is ok. - if (ICmpInst *ICI = dyn_cast<ICmpInst>(User)) { + if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) { if (!isa<ConstantPointerNull>(ICI->getOperand(1))) return false; continue; } // getelementptr is also ok, but only a simple form. - if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { + if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { // Must index into the array and into the struct. if (GEPI->getNumOperands() < 3) return false; @@ -1054,7 +1053,7 @@ static bool LoadUsesSimpleEnoughForHeapSRA(Value *V, continue; } - if (PHINode *PN = dyn_cast<PHINode>(User)) { + if (const PHINode *PN = dyn_cast<PHINode>(User)) { if (!LoadUsingPHIsPerLoad.insert(PN)) // This means some phi nodes are dependent on each other. // Avoid infinite looping! @@ -1081,13 +1080,13 @@ static bool LoadUsesSimpleEnoughForHeapSRA(Value *V, /// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from /// GV are simple enough to perform HeapSRA, return true. -static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV, +static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, Instruction *StoredVal) { - SmallPtrSet<PHINode*, 32> LoadUsingPHIs; - SmallPtrSet<PHINode*, 32> LoadUsingPHIsPerLoad; - for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; + SmallPtrSet<const PHINode*, 32> LoadUsingPHIs; + SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad; + for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; ++UI) - if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { + if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) { if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs, LoadUsingPHIsPerLoad)) return false; @@ -1099,16 +1098,16 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV, // that all inputs the to the PHI nodes are in the same equivalence sets. // Check to verify that all operands of the PHIs are either PHIS that can be // transformed, loads from GV, or MI itself. - for (SmallPtrSet<PHINode*, 32>::iterator I = LoadUsingPHIs.begin(), + for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin(), E = LoadUsingPHIs.end(); I != E; ++I) { - PHINode *PN = *I; + const PHINode *PN = *I; for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) { Value *InVal = PN->getIncomingValue(op); // PHI of the stored value itself is ok. if (InVal == StoredVal) continue; - if (PHINode *InPN = dyn_cast<PHINode>(InVal)) { + if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) { // One of the PHIs in our set is (optimistically) ok. if (LoadUsingPHIs.count(InPN)) continue; @@ -1116,7 +1115,7 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV, } // Load from GV is ok. - if (LoadInst *LI = dyn_cast<LoadInst>(InVal)) + if (const LoadInst *LI = dyn_cast<LoadInst>(InVal)) if (LI->getOperand(0) == GV) continue; @@ -1664,7 +1663,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { /// it if possible. If we make a change, return true. bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, Module::global_iterator &GVI) { - SmallPtrSet<PHINode*, 16> PHIUsers; + SmallPtrSet<const PHINode*, 16> PHIUsers; GlobalStatus GS; GV->removeDeadConstantUsers(); @@ -1715,12 +1714,13 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, GS.AccessingFunction->hasExternalLinkage() && GV->getType()->getAddressSpace() == 0) { DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); - Instruction* FirstI = GS.AccessingFunction->getEntryBlock().begin(); + Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction + ->getEntryBlock().begin()); const Type* ElemTy = GV->getType()->getElementType(); // FIXME: Pass Global's alignment when globals have alignment - AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), FirstI); + AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI); if (!isa<UndefValue>(GV->getInitializer())) - new StoreInst(GV->getInitializer(), Alloca, FirstI); + new StoreInst(GV->getInitializer(), Alloca, &FirstI); GV->replaceAllUsesWith(Alloca); GV->eraseFromParent(); diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index 3ae771c..161246b 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -168,7 +168,7 @@ bool PruneEH::SimplifyFunction(Function *F) { for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) if (II->doesNotThrow()) { - SmallVector<Value*, 8> Args(II->op_begin()+3, II->op_end()); + SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); // Insert a call instruction before the invoke. CallInst *Call = CallInst::Create(II->getCalledValue(), Args.begin(), Args.end(), "", II); diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 65f2e15..76c815d 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -766,7 +766,7 @@ protected: return SizeCI->getZExtValue() >= GetStringLength(CI->getOperand(SizeArgOp)); if (ConstantInt *Arg = dyn_cast<ConstantInt>(CI->getOperand(SizeArgOp))) - return SizeCI->getZExtValue() <= Arg->getZExtValue(); + return SizeCI->getZExtValue() >= Arg->getZExtValue(); } return false; } diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp index ea8e5c3..6135992 100644 --- a/lib/Transforms/Scalar/ABCD.cpp +++ b/lib/Transforms/Scalar/ABCD.cpp @@ -27,6 +27,7 @@ #define DEBUG_TYPE "abcd" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Constants.h" @@ -77,10 +78,10 @@ class ABCD : public FunctionPass { class Bound { public: Bound(APInt v, bool upper) : value(v), upper_bound(upper) {} - Bound(const Bound *b, int cnst) - : value(b->value - cnst), upper_bound(b->upper_bound) {} - Bound(const Bound *b, const APInt &cnst) - : value(b->value - cnst), upper_bound(b->upper_bound) {} + Bound(const Bound &b, int cnst) + : value(b.value - cnst), upper_bound(b.upper_bound) {} + Bound(const Bound &b, const APInt &cnst) + : value(b.value - cnst), upper_bound(b.upper_bound) {} /// Test if Bound is an upper bound bool isUpperBound() const { return upper_bound; } @@ -89,15 +90,15 @@ class ABCD : public FunctionPass { int32_t getBitWidth() const { return value.getBitWidth(); } /// Creates a Bound incrementing the one received - static Bound *createIncrement(const Bound *b) { - return new Bound(b->isUpperBound() ? b->value+1 : b->value-1, - b->upper_bound); + static Bound createIncrement(const Bound &b) { + return Bound(b.isUpperBound() ? b.value+1 : b.value-1, + b.upper_bound); } /// Creates a Bound decrementing the one received - static Bound *createDecrement(const Bound *b) { - return new Bound(b->isUpperBound() ? b->value-1 : b->value+1, - b->upper_bound); + static Bound createDecrement(const Bound &b) { + return Bound(b.isUpperBound() ? b.value-1 : b.value+1, + b.upper_bound); } /// Test if two bounds are equal @@ -109,36 +110,31 @@ class ABCD : public FunctionPass { } /// Test if val is less than or equal to Bound b - static bool leq(APInt val, const Bound *b) { - if (!b) return false; - return b->isUpperBound() ? val.sle(b->value) : val.sge(b->value); + static bool leq(APInt val, const Bound &b) { + return b.isUpperBound() ? val.sle(b.value) : val.sge(b.value); } /// Test if Bound a is less then or equal to Bound - static bool leq(const Bound *a, const Bound *b) { - if (!a || !b) return false; - - assert(a->isUpperBound() == b->isUpperBound()); - return a->isUpperBound() ? a->value.sle(b->value) : - a->value.sge(b->value); + static bool leq(const Bound &a, const Bound &b) { + assert(a.isUpperBound() == b.isUpperBound()); + return a.isUpperBound() ? a.value.sle(b.value) : + a.value.sge(b.value); } /// Test if Bound a is less then Bound b - static bool lt(const Bound *a, const Bound *b) { - if (!a || !b) return false; - - assert(a->isUpperBound() == b->isUpperBound()); - return a->isUpperBound() ? a->value.slt(b->value) : - a->value.sgt(b->value); + static bool lt(const Bound &a, const Bound &b) { + assert(a.isUpperBound() == b.isUpperBound()); + return a.isUpperBound() ? a.value.slt(b.value) : + a.value.sgt(b.value); } /// Test if Bound b is greater then or equal val - static bool geq(const Bound *b, APInt val) { + static bool geq(const Bound &b, APInt val) { return leq(val, b); } /// Test if Bound a is greater then or equal Bound b - static bool geq(const Bound *a, const Bound *b) { + static bool geq(const Bound &a, const Bound &b) { return leq(b, a); } @@ -152,29 +148,36 @@ class ABCD : public FunctionPass { /// minimum true and minimum reduced results are stored class MemoizedResultChart { public: - MemoizedResultChart() - : max_false(NULL), min_true(NULL), min_reduced(NULL) {} + MemoizedResultChart() {} + MemoizedResultChart(const MemoizedResultChart &other) { + if (other.max_false) + max_false.reset(new Bound(*other.max_false)); + if (other.min_true) + min_true.reset(new Bound(*other.min_true)); + if (other.min_reduced) + min_reduced.reset(new Bound(*other.min_reduced)); + } /// Returns the max false - Bound *getFalse() const { return max_false; } + const Bound *getFalse() const { return max_false.get(); } /// Returns the min true - Bound *getTrue() const { return min_true; } + const Bound *getTrue() const { return min_true.get(); } /// Returns the min reduced - Bound *getReduced() const { return min_reduced; } + const Bound *getReduced() const { return min_reduced.get(); } /// Return the stored result for this bound - ProveResult getResult(const Bound *bound) const; + ProveResult getResult(const Bound &bound) const; /// Stores a false found - void addFalse(Bound *bound); + void addFalse(const Bound &bound); /// Stores a true found - void addTrue(Bound *bound); + void addTrue(const Bound &bound); /// Stores a Reduced found - void addReduced(Bound *bound); + void addReduced(const Bound &bound); /// Clears redundant reduced /// If a min_true is smaller than a min_reduced then the min_reduced @@ -183,13 +186,13 @@ class ABCD : public FunctionPass { void clearRedundantReduced(); void clear() { - delete max_false; - delete min_true; - delete min_reduced; + max_false.reset(); + min_true.reset(); + min_reduced.reset(); } private: - Bound *max_false, *min_true, *min_reduced; + OwningPtr<Bound> max_false, min_true, min_reduced; }; /// This class stores the result found for a node of the graph, @@ -198,27 +201,27 @@ class ABCD : public FunctionPass { public: /// Test if there is true result stored from b to a /// that is less then the bound - bool hasTrue(Value *b, const Bound *bound) const { - Bound *trueBound = map.lookup(b).getTrue(); - return trueBound && Bound::leq(trueBound, bound); + bool hasTrue(Value *b, const Bound &bound) const { + const Bound *trueBound = map.lookup(b).getTrue(); + return trueBound && Bound::leq(*trueBound, bound); } /// Test if there is false result stored from b to a /// that is less then the bound - bool hasFalse(Value *b, const Bound *bound) const { - Bound *falseBound = map.lookup(b).getFalse(); - return falseBound && Bound::leq(falseBound, bound); + bool hasFalse(Value *b, const Bound &bound) const { + const Bound *falseBound = map.lookup(b).getFalse(); + return falseBound && Bound::leq(*falseBound, bound); } /// Test if there is reduced result stored from b to a /// that is less then the bound - bool hasReduced(Value *b, const Bound *bound) const { - Bound *reducedBound = map.lookup(b).getReduced(); - return reducedBound && Bound::leq(reducedBound, bound); + bool hasReduced(Value *b, const Bound &bound) const { + const Bound *reducedBound = map.lookup(b).getReduced(); + return reducedBound && Bound::leq(*reducedBound, bound); } /// Returns the stored bound for b - ProveResult getBoundResult(Value *b, Bound *bound) { + ProveResult getBoundResult(Value *b, const Bound &bound) { return map[b].getResult(bound); } @@ -233,7 +236,7 @@ class ABCD : public FunctionPass { } /// Stores the bound found - void updateBound(Value *b, Bound *bound, const ProveResult res); + void updateBound(Value *b, const Bound &bound, const ProveResult res); private: // Maps a nod in the graph with its results found. @@ -274,7 +277,7 @@ class ABCD : public FunctionPass { bool hasEdge(Value *V, bool upper) const; /// Returns all edges pointed by vertex V - SmallPtrSet<Edge *, 16> getEdges(Value *V) const { + SmallVector<Edge, 16> getEdges(Value *V) const { return graph.lookup(V); } @@ -292,13 +295,7 @@ class ABCD : public FunctionPass { } private: - DenseMap<Value *, SmallPtrSet<Edge *, 16> > graph; - - /// Adds a Node to the graph. - DenseMap<Value *, SmallPtrSet<Edge *, 16> >::iterator addNode(Value *V) { - SmallPtrSet<Edge *, 16> p; - return graph.insert(std::make_pair(V, p)).first; - } + DenseMap<Value *, SmallVector<Edge, 16> > graph; /// Prints the header of the dot file void printHeader(raw_ostream &OS, Function &F) const; @@ -315,7 +312,7 @@ class ABCD : public FunctionPass { void printVertex(raw_ostream &OS, Value *source) const; /// Prints the edge to the dot file - void printEdge(raw_ostream &OS, Value *source, Edge *edge) const; + void printEdge(raw_ostream &OS, Value *source, const Edge &edge) const; void printName(raw_ostream &OS, Value *info) const; }; @@ -428,15 +425,15 @@ class ABCD : public FunctionPass { bool demandProve(Value *a, Value *b, int c, bool upper_bound); /// Prove that distance between b and a is <= bound - ProveResult prove(Value *a, Value *b, Bound *bound, unsigned level); + ProveResult prove(Value *a, Value *b, const Bound &bound, unsigned level); /// Updates the distance value for a and b - void updateMemDistance(Value *a, Value *b, Bound *bound, unsigned level, + void updateMemDistance(Value *a, Value *b, const Bound &bound, unsigned level, meet_function meet); InequalityGraph inequality_graph; MemoizedResult mem_result; - DenseMap<Value*, Bound*> active; + DenseMap<Value*, const Bound*> active; SmallPtrSet<Value*, 16> created; SmallVector<PHINode *, 16> phis_to_remove; }; @@ -857,7 +854,7 @@ PHINode *ABCD::findSigma(BasicBlock *BB, Instruction *I) { /// This implementation works on any kind of inequality branch. bool ABCD::demandProve(Value *a, Value *b, int c, bool upper_bound) { int32_t width = cast<IntegerType>(a->getType())->getBitWidth(); - Bound *bound = new Bound(APInt(width, c), upper_bound); + Bound bound(APInt(width, c), upper_bound); mem_result.clear(); active.clear(); @@ -867,7 +864,7 @@ bool ABCD::demandProve(Value *a, Value *b, int c, bool upper_bound) { } /// Prove that distance between b and a is <= bound -ABCD::ProveResult ABCD::prove(Value *a, Value *b, Bound *bound, +ABCD::ProveResult ABCD::prove(Value *a, Value *b, const Bound &bound, unsigned level) { // if (C[b-a<=e] == True for some e <= bound // Same or stronger difference was already proven @@ -885,22 +882,22 @@ ABCD::ProveResult ABCD::prove(Value *a, Value *b, Bound *bound, return Reduced; // traversal reached the source vertex - if (a == b && Bound::geq(bound, APInt(bound->getBitWidth(), 0, true))) + if (a == b && Bound::geq(bound, APInt(bound.getBitWidth(), 0, true))) return True; // if b has no predecessor then fail - if (!inequality_graph.hasEdge(b, bound->isUpperBound())) + if (!inequality_graph.hasEdge(b, bound.isUpperBound())) return False; // a cycle was encountered if (active.count(b)) { - if (Bound::leq(active.lookup(b), bound)) + if (Bound::leq(*active.lookup(b), bound)) return Reduced; // a "harmless" cycle return False; // an amplifying cycle } - active[b] = bound; + active[b] = &bound; PHINode *PN = dyn_cast<PHINode>(b); // Test if a Value is a Phi. If it is a PHINode with more than 1 incoming @@ -917,23 +914,23 @@ ABCD::ProveResult ABCD::prove(Value *a, Value *b, Bound *bound, } /// Updates the distance value for a and b -void ABCD::updateMemDistance(Value *a, Value *b, Bound *bound, unsigned level, - meet_function meet) { +void ABCD::updateMemDistance(Value *a, Value *b, const Bound &bound, + unsigned level, meet_function meet) { ABCD::ProveResult res = (meet == max) ? False : True; - SmallPtrSet<Edge *, 16> Edges = inequality_graph.getEdges(b); - SmallPtrSet<Edge *, 16>::iterator begin = Edges.begin(), end = Edges.end(); + SmallVector<Edge, 16> Edges = inequality_graph.getEdges(b); + SmallVector<Edge, 16>::iterator begin = Edges.begin(), end = Edges.end(); for (; begin != end ; ++begin) { if (((res >= Reduced) && (meet == max)) || ((res == False) && (meet == min))) { break; } - Edge *in = *begin; - if (in->isUpperBound() == bound->isUpperBound()) { - Value *succ = in->getVertex(); - res = meet(res, prove(a, succ, new Bound(bound, in->getValue()), - level+1)); + const Edge &in = *begin; + if (in.isUpperBound() == bound.isUpperBound()) { + Value *succ = in.getVertex(); + res = meet(res, prove(a, succ, Bound(bound, in.getValue()), + level+1)); } } @@ -941,53 +938,53 @@ void ABCD::updateMemDistance(Value *a, Value *b, Bound *bound, unsigned level, } /// Return the stored result for this bound -ABCD::ProveResult ABCD::MemoizedResultChart::getResult(const Bound *bound)const{ - if (max_false && Bound::leq(bound, max_false)) +ABCD::ProveResult ABCD::MemoizedResultChart::getResult(const Bound &bound)const{ + if (max_false && Bound::leq(bound, *max_false)) return False; - if (min_true && Bound::leq(min_true, bound)) + if (min_true && Bound::leq(*min_true, bound)) return True; - if (min_reduced && Bound::leq(min_reduced, bound)) + if (min_reduced && Bound::leq(*min_reduced, bound)) return Reduced; return False; } /// Stores a false found -void ABCD::MemoizedResultChart::addFalse(Bound *bound) { - if (!max_false || Bound::leq(max_false, bound)) - max_false = bound; - - if (Bound::eq(max_false, min_reduced)) - min_reduced = Bound::createIncrement(min_reduced); - if (Bound::eq(max_false, min_true)) - min_true = Bound::createIncrement(min_true); - if (Bound::eq(min_reduced, min_true)) - min_reduced = NULL; +void ABCD::MemoizedResultChart::addFalse(const Bound &bound) { + if (!max_false || Bound::leq(*max_false, bound)) + max_false.reset(new Bound(bound)); + + if (Bound::eq(max_false.get(), min_reduced.get())) + min_reduced.reset(new Bound(Bound::createIncrement(*min_reduced))); + if (Bound::eq(max_false.get(), min_true.get())) + min_true.reset(new Bound(Bound::createIncrement(*min_true))); + if (Bound::eq(min_reduced.get(), min_true.get())) + min_reduced.reset(); clearRedundantReduced(); } /// Stores a true found -void ABCD::MemoizedResultChart::addTrue(Bound *bound) { - if (!min_true || Bound::leq(bound, min_true)) - min_true = bound; - - if (Bound::eq(min_true, min_reduced)) - min_reduced = Bound::createDecrement(min_reduced); - if (Bound::eq(min_true, max_false)) - max_false = Bound::createDecrement(max_false); - if (Bound::eq(max_false, min_reduced)) - min_reduced = NULL; +void ABCD::MemoizedResultChart::addTrue(const Bound &bound) { + if (!min_true || Bound::leq(bound, *min_true)) + min_true.reset(new Bound(bound)); + + if (Bound::eq(min_true.get(), min_reduced.get())) + min_reduced.reset(new Bound(Bound::createDecrement(*min_reduced))); + if (Bound::eq(min_true.get(), max_false.get())) + max_false.reset(new Bound(Bound::createDecrement(*max_false))); + if (Bound::eq(max_false.get(), min_reduced.get())) + min_reduced.reset(); clearRedundantReduced(); } /// Stores a Reduced found -void ABCD::MemoizedResultChart::addReduced(Bound *bound) { - if (!min_reduced || Bound::leq(bound, min_reduced)) - min_reduced = bound; - - if (Bound::eq(min_reduced, min_true)) - min_true = Bound::createIncrement(min_true); - if (Bound::eq(min_reduced, max_false)) - max_false = Bound::createDecrement(max_false); +void ABCD::MemoizedResultChart::addReduced(const Bound &bound) { + if (!min_reduced || Bound::leq(bound, *min_reduced)) + min_reduced.reset(new Bound(bound)); + + if (Bound::eq(min_reduced.get(), min_true.get())) + min_true.reset(new Bound(Bound::createIncrement(*min_true))); + if (Bound::eq(min_reduced.get(), max_false.get())) + max_false.reset(new Bound(Bound::createDecrement(*max_false))); } /// Clears redundant reduced @@ -995,14 +992,14 @@ void ABCD::MemoizedResultChart::addReduced(Bound *bound) { /// is unnecessary and then removed. It also works for min_reduced /// begin smaller than max_false. void ABCD::MemoizedResultChart::clearRedundantReduced() { - if (min_true && min_reduced && Bound::lt(min_true, min_reduced)) - min_reduced = NULL; - if (max_false && min_reduced && Bound::lt(min_reduced, max_false)) - min_reduced = NULL; + if (min_true && min_reduced && Bound::lt(*min_true, *min_reduced)) + min_reduced.reset(); + if (max_false && min_reduced && Bound::lt(*min_reduced, *max_false)) + min_reduced.reset(); } /// Stores the bound found -void ABCD::MemoizedResult::updateBound(Value *b, Bound *bound, +void ABCD::MemoizedResult::updateBound(Value *b, const Bound &bound, const ProveResult res) { if (res == False) { map[b].addFalse(bound); @@ -1020,19 +1017,17 @@ void ABCD::InequalityGraph::addEdge(Value *V_to, Value *V_from, assert(cast<IntegerType>(V_from->getType())->getBitWidth() == value.getBitWidth()); - DenseMap<Value *, SmallPtrSet<Edge *, 16> >::iterator from; - from = addNode(V_from); - from->second.insert(new Edge(V_to, value, upper)); + graph[V_from].push_back(Edge(V_to, value, upper)); } /// Test if there is any edge from V in the upper direction bool ABCD::InequalityGraph::hasEdge(Value *V, bool upper) const { - SmallPtrSet<Edge *, 16> it = graph.lookup(V); + SmallVector<Edge, 16> it = graph.lookup(V); - SmallPtrSet<Edge *, 16>::iterator begin = it.begin(); - SmallPtrSet<Edge *, 16>::iterator end = it.end(); + SmallVector<Edge, 16>::iterator begin = it.begin(); + SmallVector<Edge, 16>::iterator end = it.end(); for (; begin != end; ++begin) { - if ((*begin)->isUpperBound() == upper) { + if (begin->isUpperBound() == upper) { return true; } } @@ -1049,18 +1044,18 @@ void ABCD::InequalityGraph::printHeader(raw_ostream &OS, Function &F) const { /// Prints the body of the dot file void ABCD::InequalityGraph::printBody(raw_ostream &OS) const { - DenseMap<Value *, SmallPtrSet<Edge *, 16> >::const_iterator begin = + DenseMap<Value *, SmallVector<Edge, 16> >::const_iterator begin = graph.begin(), end = graph.end(); for (; begin != end ; ++begin) { - SmallPtrSet<Edge *, 16>::iterator begin_par = + SmallVector<Edge, 16>::const_iterator begin_par = begin->second.begin(), end_par = begin->second.end(); Value *source = begin->first; printVertex(OS, source); for (; begin_par != end_par ; ++begin_par) { - Edge *edge = *begin_par; + const Edge &edge = *begin_par; printEdge(OS, source, edge); } } @@ -1079,10 +1074,10 @@ void ABCD::InequalityGraph::printVertex(raw_ostream &OS, Value *source) const { /// Prints the edge to the dot file void ABCD::InequalityGraph::printEdge(raw_ostream &OS, Value *source, - Edge *edge) const { - Value *dest = edge->getVertex(); - APInt value = edge->getValue(); - bool upper = edge->isUpperBound(); + const Edge &edge) const { + Value *dest = edge.getVertex(); + APInt value = edge.getValue(); + bool upper = edge.isUpperBound(); OS << "\""; printName(OS, source); diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 50c9630..93e9bfb 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -174,7 +174,7 @@ bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, // don't mess around with them. BasicBlock::const_iterator BBI = BB->begin(); while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { - for (Value::use_const_iterator UI = PN->use_begin(), E = PN->use_end(); + for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) { const Instruction *User = cast<Instruction>(*UI); if (User->getParent() != DestBB || !isa<PHINode>(User)) @@ -714,8 +714,12 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, MemoryInst->replaceUsesOfWith(Addr, SunkAddr); - if (Addr->use_empty()) + if (Addr->use_empty()) { RecursivelyDeleteTriviallyDeadInstructions(Addr); + // This address is now available for reassignment, so erase the table entry; + // we don't want to match some completely different instruction. + SunkAddrs[Addr] = 0; + } return true; } diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index fcb802a..642d59d 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1004,18 +1004,18 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, // If the load and store are to the exact same address, they should have been // a must alias. AA must have gotten confused. - // FIXME: Study to see if/when this happens. - if (LoadOffset == StoreOffset) { + // FIXME: Study to see if/when this happens. One case is forwarding a memset + // to a load from the base of the memset. #if 0 + if (LoadOffset == StoreOffset) { dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" << "Base = " << *StoreBase << "\n" << "Store Ptr = " << *WritePtr << "\n" << "Store Offs = " << StoreOffset << "\n" << "Load Ptr = " << *LoadPtr << "\n"; abort(); -#endif - return -1; } +#endif // If the load and store don't overlap at all, the store doesn't provide // anything to the load. In this case, they really don't alias at all, AA @@ -1031,11 +1031,11 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, bool isAAFailure = false; - if (StoreOffset < LoadOffset) { + if (StoreOffset < LoadOffset) isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset; - } else { + else isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset; - } + if (isAAFailure) { #if 0 dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n" diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index eb04d94..988a4cb 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -553,22 +553,26 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) { // New instructions were inserted at the end of the preheader. if (isa<PHINode>(I)) break; + // Don't move instructions which might have side effects, since the side - // effects need to complete before instructions inside the loop. Also - // don't move instructions which might read memory, since the loop may - // modify memory. Note that it's okay if the instruction might have - // undefined behavior: LoopSimplify guarantees that the preheader - // dominates the exit block. + // effects need to complete before instructions inside the loop. Also don't + // move instructions which might read memory, since the loop may modify + // memory. Note that it's okay if the instruction might have undefined + // behavior: LoopSimplify guarantees that the preheader dominates the exit + // block. if (I->mayHaveSideEffects() || I->mayReadFromMemory()) continue; + // Skip debug info intrinsics. if (isa<DbgInfoIntrinsic>(I)) continue; + // Don't sink static AllocaInsts out of the entry block, which would // turn them into dynamic allocas! if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) if (AI->isStaticAlloca()) continue; + // Determine if there is a use in or before the loop (direct or // otherwise). bool UsedInLoop = false; @@ -585,19 +589,29 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) { break; } } + // If there is, the def must remain in the preheader. if (UsedInLoop) continue; + // Otherwise, sink it to the exit block. Instruction *ToMove = I; bool Done = false; - if (I != Preheader->begin()) - --I; - else + + if (I != Preheader->begin()) { + // Skip debug info intrinsics. + do { + --I; + } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); + + if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) + Done = true; + } else { Done = true; + } + ToMove->moveBefore(InsertPt); - if (Done) - break; + if (Done) break; InsertPt = ToMove; } } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index f920dca..625a75d 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1899,7 +1899,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { const Value *V = U->getValue(); if (const Instruction *Inst = dyn_cast<Instruction>(V)) if (L->contains(Inst)) continue; - for (Value::use_const_iterator UI = V->use_begin(), UE = V->use_end(); + for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; ++UI) { const Instruction *UserInst = dyn_cast<Instruction>(*UI); // Ignore non-instructions. @@ -2827,6 +2827,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, IP = Tentative; } while (isa<PHINode>(IP)) ++IP; + while (isa<DbgInfoIntrinsic>(IP)) ++IP; // Inform the Rewriter if we have a post-increment use, so that it can // perform an advantageous expansion. @@ -2864,8 +2865,10 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // so that it is dominated by its operand. If the original insert point // was already dominated by the increment, keep it, because there may // be loop-variant operands that need to be respected also. - if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP)) + if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP)) { IP = IVIncInsertPos; + while (isa<DbgInfoIntrinsic>(IP)) ++IP; + } break; } Start = AR->getStart(); diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp index 99e1252..7a6eec3 100644 --- a/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/lib/Transforms/Scalar/Reg2Mem.cpp @@ -45,7 +45,7 @@ namespace { bool valueEscapes(const Instruction *Inst) const { const BasicBlock *BB = Inst->getParent(); - for (Value::use_const_iterator UI = Inst->use_begin(),E = Inst->use_end(); + for (Value::const_use_iterator UI = Inst->use_begin(),E = Inst->use_end(); UI != E; ++UI) if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI)) diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 7e37938..546b7b6 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -1705,28 +1705,31 @@ ModulePass *llvm::createIPSCCPPass() { } -static bool AddressIsTaken(GlobalValue *GV) { +static bool AddressIsTaken(const GlobalValue *GV) { // Delete any dead constantexpr klingons. GV->removeDeadConstantUsers(); - for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); - UI != E; ++UI) - if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { + for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); + UI != E; ++UI) { + const User *U = *UI; + if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { if (SI->getOperand(0) == GV || SI->isVolatile()) return true; // Storing addr of GV. - } else if (isa<InvokeInst>(*UI) || isa<CallInst>(*UI)) { + } else if (isa<InvokeInst>(U) || isa<CallInst>(U)) { // Make sure we are calling the function, not passing the address. - if (UI.getOperandNo() != 0) + ImmutableCallSite CS(cast<Instruction>(U)); + if (!CS.isCallee(UI)) return true; - } else if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { + } else if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { if (LI->isVolatile()) return true; - } else if (isa<BlockAddress>(*UI)) { + } else if (isa<BlockAddress>(U)) { // blockaddress doesn't take the address of the function, it takes addr // of label. } else { return true; } + } return false; } diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 738c5e8..b621e8d 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -79,7 +79,7 @@ static void ChangeToUnreachable(Instruction *I) { /// ChangeToCall - Convert the specified invoke into a normal call. static void ChangeToCall(InvokeInst *II) { BasicBlock *BB = II->getParent(); - SmallVector<Value*, 8> Args(II->op_begin()+3, II->op_end()); + SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args.begin(), Args.end(), "", II); NewCall->takeName(II); diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 22f3628..5941ea6 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -350,10 +350,16 @@ struct StrNCmpOpt : public LibCallOptimization { // 'strcpy' Optimizations struct StrCpyOpt : public LibCallOptimization { + bool OptChkCall; // True if it's optimizing a __strcpy_chk libcall. + + StrCpyOpt(bool c) : OptChkCall(c) {} + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strcpy" function prototype. + unsigned NumParams = OptChkCall ? 3 : 2; const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || + if (FT->getNumParams() != NumParams || + FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(*Context)) return 0; @@ -371,8 +377,13 @@ struct StrCpyOpt : public LibCallOptimization { // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. - EmitMemCpy(Dst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD); + if (OptChkCall) + EmitMemCpyChk(Dst, Src, + ConstantInt::get(TD->getIntPtrType(*Context), Len), + CI->getOperand(3), B, TD); + else + EmitMemCpy(Dst, Src, + ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD); return Dst; } }; @@ -1162,7 +1173,8 @@ namespace { StringMap<LibCallOptimization*> Optimizations; // String and Memory LibCall Optimizations StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrCmpOpt StrCmp; - StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrNCpyOpt StrNCpy; StrLenOpt StrLen; + StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrCpyOpt StrCpyChk; + StrNCpyOpt StrNCpy; StrLenOpt StrLen; StrToOpt StrTo; StrStrOpt StrStr; MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet; // Math Library Optimizations @@ -1177,8 +1189,7 @@ namespace { bool Modified; // This is only used by doInitialization. public: static char ID; // Pass identification - SimplifyLibCalls() : FunctionPass(&ID) {} - + SimplifyLibCalls() : FunctionPass(&ID), StrCpy(false), StrCpyChk(true) {} void InitOptimizations(); bool runOnFunction(Function &F); @@ -1228,6 +1239,9 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["memmove"] = &MemMove; Optimizations["memset"] = &MemSet; + // _chk variants of String and Memory LibCall Optimizations. + Optimizations["__strcpy_chk"] = &StrCpyChk; + // Math Library Optimizations Optimizations["powf"] = &Pow; Optimizations["pow"] = &Pow; diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp index be6b383..c70bab5 100644 --- a/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -440,8 +440,9 @@ static bool FindAllMemoryUses(Instruction *I, } if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { - if (UI.getOperandNo() == 0) return true; // Storing addr, not into addr. - MemoryUses.push_back(std::make_pair(SI, UI.getOperandNo())); + unsigned opNo = UI.getOperandNo(); + if (opNo == 0) return true; // Storing addr, not into addr. + MemoryUses.push_back(std::make_pair(SI, opNo)); continue; } diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index 3657390..8c25ad1 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -94,7 +94,7 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, if (TI->getNumSuccessors() == 1) return false; const BasicBlock *Dest = TI->getSuccessor(SuccNum); - pred_const_iterator I = pred_begin(Dest), E = pred_end(Dest); + const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest); // If there is more than one predecessor, this is a critical edge... assert(I != E && "No preds, but we have an edge to the block?"); diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index b44f019..0afccf4 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -108,7 +108,7 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, /// EmitMemCpy - Emit a call to the memcpy function to the builder. This always -/// expects that the size has type 'intptr_t' and Dst/Src are pointers. +/// expects that Len has type 'intptr_t' and Dst/Src are pointers. Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len, unsigned Align, IRBuilder<> &B, const TargetData *TD) { Module *M = B.GetInsertBlock()->getParent()->getParent(); @@ -120,10 +120,34 @@ Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len, ConstantInt::get(B.getInt32Ty(), Align)); } +/// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder. +/// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src +/// are pointers. +Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, + IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI; + AWI = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Value *MemCpy = M->getOrInsertFunction("__memcpy_chk", + AttrListPtr::get(&AWI, 1), + B.getInt8PtrTy(), + B.getInt8PtrTy(), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), + TD->getIntPtrType(Context), NULL); + Dst = CastToCStr(Dst, B); + Src = CastToCStr(Src, B); + CallInst *CI = B.CreateCall4(MemCpy, Dst, Src, Len, ObjSize); + if (const Function *F = dyn_cast<Function>(MemCpy->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + /// EmitMemMove - Emit a call to the memmove function to the builder. This /// always expects that the size has type 'intptr_t' and Dst/Src are pointers. Value *llvm::EmitMemMove(Value *Dst, Value *Src, Value *Len, - unsigned Align, IRBuilder<> &B, const TargetData *TD) { + unsigned Align, IRBuilder<> &B, const TargetData *TD) { Module *M = B.GetInsertBlock()->getParent()->getParent(); LLVMContext &Context = B.GetInsertBlock()->getContext(); const Type *Ty = TD->getIntPtrType(Context); diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index 766c4d9..bbbcc1a 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -226,7 +226,7 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) { bool Changed = false; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { - std::vector<Value*> CallArgs(II->op_begin()+3, II->op_end()); + std::vector<Value*> CallArgs(II->op_begin(), II->op_end() - 3); // Insert a normal call instruction... CallInst *NewCall = CallInst::Create(II->getCalledValue(), CallArgs.begin(), CallArgs.end(), "",II); @@ -298,7 +298,7 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, CatchSwitch->addCase(InvokeNoC, II->getUnwindDest()); // Insert a normal call instruction. - std::vector<Value*> CallArgs(II->op_begin()+3, II->op_end()); + std::vector<Value*> CallArgs(II->op_begin(), II->op_end() - 3); CallInst *NewCall = CallInst::Create(II->getCalledValue(), CallArgs.begin(), CallArgs.end(), "", II); diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 4f5a70b..f181f3a 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -68,7 +68,7 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { // assignments to subsections of the memory unit. // Only allow direct and non-volatile loads and stores... - for (Value::use_const_iterator UI = AI->use_begin(), UE = AI->use_end(); + for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); UI != UE; ++UI) // Loop over all of the uses of the alloca if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) { if (LI->isVolatile()) diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index a31235a..292332e 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -14,31 +14,82 @@ #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Instructions.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Support/AlignOf.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -typedef DenseMap<BasicBlock*, TrackingVH<Value> > AvailableValsTy; -typedef std::vector<std::pair<BasicBlock*, TrackingVH<Value> > > - IncomingPredInfoTy; +/// BBInfo - Per-basic block information used internally by SSAUpdater. +/// The predecessors of each block are cached here since pred_iterator is +/// slow and we need to iterate over the blocks at least a few times. +class SSAUpdater::BBInfo { +public: + Value *AvailableVal; // Value to use in this block. + BasicBlock *DefBB; // Block that defines the available value. + unsigned NumPreds; // Number of predecessor blocks. + BasicBlock **Preds; // Array[NumPreds] of predecessor blocks. + unsigned Counter; // Marker to identify blocks already visited. + PHINode *PHITag; // Marker for existing PHIs that match. + + BBInfo(BasicBlock *BB, Value *V, BumpPtrAllocator *Allocator); +}; +typedef DenseMap<BasicBlock*, SSAUpdater::BBInfo*> BBMapTy; + +SSAUpdater::BBInfo::BBInfo(BasicBlock *BB, Value *V, + BumpPtrAllocator *Allocator) + : AvailableVal(V), DefBB(0), NumPreds(0), Preds(0), Counter(0), PHITag(0) { + // If this block has a known value, don't bother finding its predecessors. + if (V) { + DefBB = BB; + return; + } + + // We can get our predecessor info by walking the pred_iterator list, but it + // is relatively slow. If we already have PHI nodes in this block, walk one + // of them to get the predecessor list instead. + if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { + NumPreds = SomePhi->getNumIncomingValues(); + Preds = static_cast<BasicBlock**> + (Allocator->Allocate(NumPreds * sizeof(BasicBlock*), + AlignOf<BasicBlock*>::Alignment)); + for (unsigned pi = 0; pi != NumPreds; ++pi) + Preds[pi] = SomePhi->getIncomingBlock(pi); + return; + } + + // Stash the predecessors in a temporary vector until we know how much space + // to allocate for them. + SmallVector<BasicBlock*, 10> TmpPreds; + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + TmpPreds.push_back(*PI); + ++NumPreds; + } + Preds = static_cast<BasicBlock**> + (Allocator->Allocate(NumPreds * sizeof(BasicBlock*), + AlignOf<BasicBlock*>::Alignment)); + memcpy(Preds, TmpPreds.data(), NumPreds * sizeof(BasicBlock*)); +} +typedef DenseMap<BasicBlock*, Value*> AvailableValsTy; static AvailableValsTy &getAvailableVals(void *AV) { return *static_cast<AvailableValsTy*>(AV); } -static IncomingPredInfoTy &getIncomingPredInfo(void *IPI) { - return *static_cast<IncomingPredInfoTy*>(IPI); +static BBMapTy *getBBMap(void *BM) { + return static_cast<BBMapTy*>(BM); } +static BumpPtrAllocator *getAllocator(void *BPA) { + return static_cast<BumpPtrAllocator*>(BPA); +} SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI) - : AV(0), PrototypeValue(0), IPI(0), InsertedPHIs(NewPHI) {} + : AV(0), PrototypeValue(0), BM(0), BPA(0), InsertedPHIs(NewPHI) {} SSAUpdater::~SSAUpdater() { delete &getAvailableVals(AV); - delete &getIncomingPredInfo(IPI); } /// Initialize - Reset this object to get ready for a new set of SSA @@ -48,11 +99,6 @@ void SSAUpdater::Initialize(Value *ProtoValue) { AV = new AvailableValsTy(); else getAvailableVals(AV).clear(); - - if (IPI == 0) - IPI = new IncomingPredInfoTy(); - else - getIncomingPredInfo(IPI).clear(); PrototypeValue = ProtoValue; } @@ -73,7 +119,7 @@ void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { /// IsEquivalentPHI - Check if PHI has the same incoming value as specified /// in ValueMapping for each predecessor block. -static bool IsEquivalentPHI(PHINode *PHI, +static bool IsEquivalentPHI(PHINode *PHI, DenseMap<BasicBlock*, Value*> &ValueMapping) { unsigned PHINumValues = PHI->getNumIncomingValues(); if (PHINumValues != ValueMapping.size()) @@ -89,38 +135,12 @@ static bool IsEquivalentPHI(PHINode *PHI, return true; } -/// GetExistingPHI - Check if BB already contains a phi node that is equivalent -/// to the specified mapping from predecessor blocks to incoming values. -static Value *GetExistingPHI(BasicBlock *BB, - DenseMap<BasicBlock*, Value*> &ValueMapping) { - PHINode *SomePHI; - for (BasicBlock::iterator It = BB->begin(); - (SomePHI = dyn_cast<PHINode>(It)); ++It) { - if (IsEquivalentPHI(SomePHI, ValueMapping)) - return SomePHI; - } - return 0; -} - -/// GetExistingPHI - Check if BB already contains an equivalent phi node. -/// The InputIt type must be an iterator over std::pair<BasicBlock*, Value*> -/// objects that specify the mapping from predecessor blocks to incoming values. -template<typename InputIt> -static Value *GetExistingPHI(BasicBlock *BB, const InputIt &I, - const InputIt &E) { - // Avoid create the mapping if BB has no phi nodes at all. - if (!isa<PHINode>(BB->begin())) - return 0; - DenseMap<BasicBlock*, Value*> ValueMapping(I, E); - return GetExistingPHI(BB, ValueMapping); -} - /// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is /// live at the end of the specified block. Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) { - assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State"); + assert(BM == 0 && BPA == 0 && "Unexpected Internal State"); Value *Res = GetValueAtEndOfBlockInternal(BB); - assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State"); + assert(BM == 0 && BPA == 0 && "Unexpected Internal State"); return Res; } @@ -146,7 +166,7 @@ Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) { Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // If there is no definition of the renamed variable in this block, just use // GetValueAtEndOfBlock to do our work. - if (!getAvailableVals(AV).count(BB)) + if (!HasValueForBlock(BB)) return GetValueAtEndOfBlock(BB); // Otherwise, we have the hard case. Get the live-in values for each @@ -193,10 +213,18 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { if (SingularValue != 0) return SingularValue; - // Otherwise, we do need a PHI. - if (Value *ExistingPHI = GetExistingPHI(BB, PredValues.begin(), - PredValues.end())) - return ExistingPHI; + // Otherwise, we do need a PHI: check to see if we already have one available + // in this block that produces the right value. + if (isa<PHINode>(BB->begin())) { + DenseMap<BasicBlock*, Value*> ValueMapping(PredValues.begin(), + PredValues.end()); + PHINode *SomePHI; + for (BasicBlock::iterator It = BB->begin(); + (SomePHI = dyn_cast<PHINode>(It)); ++It) { + if (IsEquivalentPHI(SomePHI, ValueMapping)) + return SomePHI; + } + } // Ok, we have no way out, insert a new one now. PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(), @@ -226,7 +254,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { /// which use their value in the corresponding predecessor. void SSAUpdater::RewriteUse(Use &U) { Instruction *User = cast<Instruction>(U.getUser()); - + Value *V; if (PHINode *UserPN = dyn_cast<PHINode>(User)) V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U)); @@ -236,161 +264,264 @@ void SSAUpdater::RewriteUse(Use &U) { U.set(V); } - /// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry /// for the specified BB and if so, return it. If not, construct SSA form by -/// walking predecessors inserting PHI nodes as needed until we get to a block -/// where the value is available. -/// +/// first calculating the required placement of PHIs and then inserting new +/// PHIs where needed. Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { AvailableValsTy &AvailableVals = getAvailableVals(AV); + if (Value *V = AvailableVals[BB]) + return V; + + // Pool allocation used internally by GetValueAtEndOfBlock. + BumpPtrAllocator AllocatorObj; + BBMapTy BBMapObj; + BPA = &AllocatorObj; + BM = &BBMapObj; + + BBInfo *Info = new (AllocatorObj) BBInfo(BB, 0, &AllocatorObj); + BBMapObj[BB] = Info; + + bool Changed; + unsigned Counter = 1; + do { + Changed = false; + FindPHIPlacement(BB, Info, Changed, Counter); + ++Counter; + } while (Changed); + + FindAvailableVal(BB, Info, Counter); + + BPA = 0; + BM = 0; + return Info->AvailableVal; +} - // Query AvailableVals by doing an insertion of null. - std::pair<AvailableValsTy::iterator, bool> InsertRes = - AvailableVals.insert(std::make_pair(BB, TrackingVH<Value>())); - - // Handle the case when the insertion fails because we have already seen BB. - if (!InsertRes.second) { - // If the insertion failed, there are two cases. The first case is that the - // value is already available for the specified block. If we get this, just - // return the value. - if (InsertRes.first->second != 0) - return InsertRes.first->second; - - // Otherwise, if the value we find is null, then this is the value is not - // known but it is being computed elsewhere in our recursion. This means - // that we have a cycle. Handle this by inserting a PHI node and returning - // it. When we get back to the first instance of the recursion we will fill - // in the PHI node. - return InsertRes.first->second = - PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(), - &BB->front()); +/// FindPHIPlacement - Recursively visit the predecessors of a block to find +/// the reaching definition for each predecessor and then determine whether +/// a PHI is needed in this block. +void SSAUpdater::FindPHIPlacement(BasicBlock *BB, BBInfo *Info, bool &Changed, + unsigned Counter) { + AvailableValsTy &AvailableVals = getAvailableVals(AV); + BBMapTy *BBMap = getBBMap(BM); + BumpPtrAllocator *Allocator = getAllocator(BPA); + bool BBNeedsPHI = false; + BasicBlock *SamePredDefBB = 0; + + // If there are no predecessors, then we must have found an unreachable + // block. Treat it as a definition with 'undef'. + if (Info->NumPreds == 0) { + Info->AvailableVal = UndefValue::get(PrototypeValue->getType()); + Info->DefBB = BB; + return; } - // Okay, the value isn't in the map and we just inserted a null in the entry - // to indicate that we're processing the block. Since we have no idea what - // value is in this block, we have to recurse through our predecessors. - // - // While we're walking our predecessors, we keep track of them in a vector, - // then insert a PHI node in the end if we actually need one. We could use a - // smallvector here, but that would take a lot of stack space for every level - // of the recursion, just use IncomingPredInfo as an explicit stack. - IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI); - unsigned FirstPredInfoEntry = IncomingPredInfo.size(); - - // As we're walking the predecessors, keep track of whether they are all - // producing the same value. If so, this value will capture it, if not, it - // will get reset to null. We distinguish the no-predecessor case explicitly - // below. - TrackingVH<Value> ExistingValue; - - // We can get our predecessor info by walking the pred_iterator list, but it - // is relatively slow. If we already have PHI nodes in this block, walk one - // of them to get the predecessor list instead. - if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { - for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) { - BasicBlock *PredBB = SomePhi->getIncomingBlock(i); - Value *PredVal = GetValueAtEndOfBlockInternal(PredBB); - IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal)); - - // Set ExistingValue to singular value from all predecessors so far. - if (i == 0) - ExistingValue = PredVal; - else if (PredVal != ExistingValue) - ExistingValue = 0; + Info->Counter = Counter; + for (unsigned pi = 0; pi != Info->NumPreds; ++pi) { + BasicBlock *Pred = Info->Preds[pi]; + BBMapTy::value_type &BBMapBucket = BBMap->FindAndConstruct(Pred); + if (!BBMapBucket.second) { + Value *PredVal = AvailableVals.lookup(Pred); + BBMapBucket.second = new (*Allocator) BBInfo(Pred, PredVal, Allocator); } - } else { - bool isFirstPred = true; - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - BasicBlock *PredBB = *PI; - Value *PredVal = GetValueAtEndOfBlockInternal(PredBB); - IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal)); - - // Set ExistingValue to singular value from all predecessors so far. - if (isFirstPred) { - ExistingValue = PredVal; - isFirstPred = false; - } else if (PredVal != ExistingValue) - ExistingValue = 0; + BBInfo *PredInfo = BBMapBucket.second; + BasicBlock *DefBB = 0; + if (!PredInfo->AvailableVal) { + if (PredInfo->Counter != Counter) + FindPHIPlacement(Pred, PredInfo, Changed, Counter); + + // Ignore back edges where the value is not yet known. + if (!PredInfo->DefBB) + continue; } + DefBB = PredInfo->DefBB; + + if (!SamePredDefBB) + SamePredDefBB = DefBB; + else if (DefBB != SamePredDefBB) + BBNeedsPHI = true; } - // If there are no predecessors, then we must have found an unreachable block - // just return 'undef'. Since there are no predecessors, InsertRes must not - // be invalidated. - if (IncomingPredInfo.size() == FirstPredInfoEntry) - return InsertRes.first->second = UndefValue::get(PrototypeValue->getType()); - - /// Look up BB's entry in AvailableVals. 'InsertRes' may be invalidated. If - /// this block is involved in a loop, a no-entry PHI node will have been - /// inserted as InsertedVal. Otherwise, we'll still have the null we inserted - /// above. - TrackingVH<Value> &InsertedVal = AvailableVals[BB]; - - // If the predecessor values are not all the same, then check to see if there - // is an existing PHI that can be used. - if (!ExistingValue) - ExistingValue = GetExistingPHI(BB, - IncomingPredInfo.begin()+FirstPredInfoEntry, - IncomingPredInfo.end()); - - // If there is an existing value we can use, then we don't need to insert a - // PHI. This is the simple and common case. - if (ExistingValue) { - // If a PHI node got inserted, replace it with the existing value and delete - // it. - if (InsertedVal) { - PHINode *OldVal = cast<PHINode>(InsertedVal); - // Be careful about dead loops. These RAUW's also update InsertedVal. - if (InsertedVal != ExistingValue) - OldVal->replaceAllUsesWith(ExistingValue); - else - OldVal->replaceAllUsesWith(UndefValue::get(InsertedVal->getType())); - OldVal->eraseFromParent(); - } else { - InsertedVal = ExistingValue; - } + BasicBlock *NewDefBB = (BBNeedsPHI ? BB : SamePredDefBB); + if (Info->DefBB != NewDefBB) { + Changed = true; + Info->DefBB = NewDefBB; + } +} - // Either path through the 'if' should have set InsertedVal -> ExistingVal. - assert((InsertedVal == ExistingValue || isa<UndefValue>(InsertedVal)) && - "RAUW didn't change InsertedVal to be ExistingValue"); +/// FindAvailableVal - If this block requires a PHI, first check if an existing +/// PHI matches the PHI placement and reaching definitions computed earlier, +/// and if not, create a new PHI. Visit all the block's predecessors to +/// calculate the available value for each one and fill in the incoming values +/// for a new PHI. +void SSAUpdater::FindAvailableVal(BasicBlock *BB, BBInfo *Info, + unsigned Counter) { + if (Info->AvailableVal || Info->Counter == Counter) + return; - // Drop the entries we added in IncomingPredInfo to restore the stack. - IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, - IncomingPredInfo.end()); - return ExistingValue; + AvailableValsTy &AvailableVals = getAvailableVals(AV); + BBMapTy *BBMap = getBBMap(BM); + + // Check if there needs to be a PHI in BB. + PHINode *NewPHI = 0; + if (Info->DefBB == BB) { + // Look for an existing PHI. + FindExistingPHI(BB); + if (!Info->AvailableVal) { + NewPHI = PHINode::Create(PrototypeValue->getType(), + PrototypeValue->getName(), &BB->front()); + NewPHI->reserveOperandSpace(Info->NumPreds); + Info->AvailableVal = NewPHI; + AvailableVals[BB] = NewPHI; + } } - // Otherwise, we do need a PHI: insert one now if we don't already have one. - if (InsertedVal == 0) - InsertedVal = PHINode::Create(PrototypeValue->getType(), - PrototypeValue->getName(), &BB->front()); + // Iterate through the block's predecessors. + Info->Counter = Counter; + for (unsigned pi = 0; pi != Info->NumPreds; ++pi) { + BasicBlock *Pred = Info->Preds[pi]; + BBInfo *PredInfo = (*BBMap)[Pred]; + FindAvailableVal(Pred, PredInfo, Counter); + if (NewPHI) { + // Skip to the nearest preceding definition. + if (PredInfo->DefBB != Pred) + PredInfo = (*BBMap)[PredInfo->DefBB]; + NewPHI->addIncoming(PredInfo->AvailableVal, Pred); + } else if (!Info->AvailableVal) + Info->AvailableVal = PredInfo->AvailableVal; + } - PHINode *InsertedPHI = cast<PHINode>(InsertedVal); - InsertedPHI->reserveOperandSpace(IncomingPredInfo.size()-FirstPredInfoEntry); + if (NewPHI) { + DEBUG(dbgs() << " Inserted PHI: " << *NewPHI << "\n"); - // Fill in all the predecessors of the PHI. - for (IncomingPredInfoTy::iterator I = - IncomingPredInfo.begin()+FirstPredInfoEntry, - E = IncomingPredInfo.end(); I != E; ++I) - InsertedPHI->addIncoming(I->second, I->first); + // If the client wants to know about all new instructions, tell it. + if (InsertedPHIs) InsertedPHIs->push_back(NewPHI); + } +} - // Drop the entries we added in IncomingPredInfo to restore the stack. - IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, - IncomingPredInfo.end()); +/// FindExistingPHI - Look through the PHI nodes in a block to see if any of +/// them match what is needed. +void SSAUpdater::FindExistingPHI(BasicBlock *BB) { + PHINode *SomePHI; + for (BasicBlock::iterator It = BB->begin(); + (SomePHI = dyn_cast<PHINode>(It)); ++It) { + if (CheckIfPHIMatches(SomePHI)) { + RecordMatchingPHI(SomePHI); + break; + } + ClearPHITags(SomePHI); + } +} - // See if the PHI node can be merged to a single value. This can happen in - // loop cases when we get a PHI of itself and one other value. - if (Value *ConstVal = InsertedPHI->hasConstantValue()) { - InsertedPHI->replaceAllUsesWith(ConstVal); - InsertedPHI->eraseFromParent(); - InsertedVal = ConstVal; - } else { - DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); +/// CheckIfPHIMatches - Check if a PHI node matches the placement and values +/// in the BBMap. +bool SSAUpdater::CheckIfPHIMatches(PHINode *PHI) { + BBMapTy *BBMap = getBBMap(BM); + SmallVector<PHINode*, 20> WorkList; + WorkList.push_back(PHI); + + // Mark that the block containing this PHI has been visited. + (*BBMap)[PHI->getParent()]->PHITag = PHI; + + while (!WorkList.empty()) { + PHI = WorkList.pop_back_val(); + + // Iterate through the PHI's incoming values. + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { + Value *IncomingVal = PHI->getIncomingValue(i); + BasicBlock *Pred = PHI->getIncomingBlock(i); + BBInfo *PredInfo = (*BBMap)[Pred]; + // Skip to the nearest preceding definition. + if (PredInfo->DefBB != Pred) { + Pred = PredInfo->DefBB; + PredInfo = (*BBMap)[Pred]; + } + + // Check if it matches the expected value. + if (PredInfo->AvailableVal) { + if (IncomingVal == PredInfo->AvailableVal) + continue; + return false; + } + + // Check if the value is a PHI in the correct block. + PHINode *IncomingPHIVal = dyn_cast<PHINode>(IncomingVal); + if (!IncomingPHIVal || IncomingPHIVal->getParent() != Pred) + return false; + + // If this block has already been visited, check if this PHI matches. + if (PredInfo->PHITag) { + if (IncomingPHIVal == PredInfo->PHITag) + continue; + return false; + } + PredInfo->PHITag = IncomingPHIVal; + + WorkList.push_back(IncomingPHIVal); + } + } + return true; +} - // If the client wants to know about all new instructions, tell it. - if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); +/// RecordMatchingPHI - For a PHI node that matches, record it and its input +/// PHIs in both the BBMap and the AvailableVals mapping. +void SSAUpdater::RecordMatchingPHI(PHINode *PHI) { + BBMapTy *BBMap = getBBMap(BM); + AvailableValsTy &AvailableVals = getAvailableVals(AV); + SmallVector<PHINode*, 20> WorkList; + WorkList.push_back(PHI); + + // Record this PHI. + BasicBlock *BB = PHI->getParent(); + AvailableVals[BB] = PHI; + (*BBMap)[BB]->AvailableVal = PHI; + + while (!WorkList.empty()) { + PHI = WorkList.pop_back_val(); + + // Iterate through the PHI's incoming values. + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { + PHINode *IncomingPHIVal = dyn_cast<PHINode>(PHI->getIncomingValue(i)); + if (!IncomingPHIVal) continue; + BB = IncomingPHIVal->getParent(); + BBInfo *Info = (*BBMap)[BB]; + if (!Info || Info->AvailableVal) + continue; + + // Record the PHI and add it to the worklist. + AvailableVals[BB] = IncomingPHIVal; + Info->AvailableVal = IncomingPHIVal; + WorkList.push_back(IncomingPHIVal); + } } +} - return InsertedVal; +/// ClearPHITags - When one of the existing PHI nodes fails to match, clear +/// the PHITag values that were stored in the BBMap when checking to see if +/// it matched. +void SSAUpdater::ClearPHITags(PHINode *PHI) { + BBMapTy *BBMap = getBBMap(BM); + SmallVector<PHINode*, 20> WorkList; + WorkList.push_back(PHI); + + // Clear the tag for this PHI. + (*BBMap)[PHI->getParent()]->PHITag = 0; + + while (!WorkList.empty()) { + PHI = WorkList.pop_back_val(); + + // Iterate through the PHI's incoming values. + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { + PHINode *IncomingPHIVal = dyn_cast<PHINode>(PHI->getIncomingValue(i)); + if (!IncomingPHIVal) continue; + BasicBlock *BB = IncomingPHIVal->getParent(); + BBInfo *Info = (*BBMap)[BB]; + if (!Info || Info->AvailableVal || !Info->PHITag) + continue; + + // Clear the tag and add the PHI to the worklist. + Info->PHITag = 0; + WorkList.push_back(IncomingPHIVal); + } + } } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 2ce5bdc..9f2209d 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -224,7 +224,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, if (BI->isUnconditional() && BI->getSuccessor(0) == BB) { if (!AggressiveInsts) return false; // Okay, it looks like the instruction IS in the "condition". Check to - // see if its a cheap instruction to unconditionally compute, and if it + // see if it's a cheap instruction to unconditionally compute, and if it // only uses stuff defined outside of the condition. If so, hoist it out. if (!I->isSafeToSpeculativelyExecute()) return false; @@ -1768,7 +1768,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { Pred->getInstList().remove(II); // Take out of symbol table // Insert the call now. - SmallVector<Value*,8> Args(II->op_begin()+3, II->op_end()); + SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3); CallInst *CI = CallInst::Create(II->getCalledValue(), Args.begin(), Args.end(), II->getName(), BI); @@ -1970,13 +1970,13 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { II->removeFromParent(); // Take out of symbol table // Insert the call now... - SmallVector<Value*, 8> Args(II->op_begin()+3, II->op_end()); + SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3); CallInst *CI = CallInst::Create(II->getCalledValue(), Args.begin(), Args.end(), II->getName(), BI); CI->setCallingConv(II->getCallingConv()); CI->setAttributes(II->getAttributes()); - // If the invoke produced a value, the Call does now instead. + // If the invoke produced a value, the call does now instead. II->replaceAllUsesWith(CI); delete II; Changed = true; diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index 0eb9f02..f6a6076 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -1681,7 +1681,7 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) { // Output predecessors for the block... Out.PadToColumn(50); Out << ";"; - pred_const_iterator PI = pred_begin(BB), PE = pred_end(BB); + const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB); if (PI == PE) { Out << " No predecessors!"; @@ -1875,6 +1875,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { if (PAL.getFnAttributes() != Attribute::None) Out << ' ' << Attribute::getAsString(PAL.getFnAttributes()); } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) { + Operand = II->getCalledValue(); const PointerType *PTy = cast<PointerType>(Operand->getType()); const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); const Type *RetTy = FTy->getReturnType(); @@ -1912,10 +1913,10 @@ void AssemblyWriter::printInstruction(const Instruction &I) { writeOperand(Operand, true); } Out << '('; - for (unsigned op = 3, Eop = I.getNumOperands(); op < Eop; ++op) { - if (op > 3) + for (unsigned op = 0, Eop = I.getNumOperands() - 3; op < Eop; ++op) { + if (op) Out << ", "; - writeParamOperand(I.getOperand(op), PAL.getParamAttributes(op-2)); + writeParamOperand(I.getOperand(op), PAL.getParamAttributes(op + 1)); } Out << ')'; diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index 5e4c9fb..b9aa5c3 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -225,7 +225,12 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { // Calls to these intrinsics are transformed into ShuffleVector's. NewFn = 0; return true; + } else if (Name.compare(5, 16, "x86.sse41.pmulld", 16) == 0) { + // Calls to these intrinsics are transformed into vector multiplies. + NewFn = 0; + return true; } + break; } @@ -355,6 +360,18 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { // Clean up the old call now that it has been completely upgraded. CI->eraseFromParent(); + } else if (F->getName() == "llvm.x86.sse41.pmulld") { + // Upgrade this set of intrinsics into vector multiplies. + Instruction *Mul = BinaryOperator::CreateMul(CI->getOperand(1), + CI->getOperand(2), + CI->getName(), + CI); + // Fix up all the uses with our new multiply. + if (!CI->use_empty()) + CI->replaceAllUsesWith(Mul); + + // Remove upgraded multiply. + CI->eraseFromParent(); } else { llvm_unreachable("Unknown function for CallInst upgrade."); } diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt index 4b80e36..c64564b 100644 --- a/lib/VMCore/CMakeLists.txt +++ b/lib/VMCore/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_library(LLVMCore ConstantFold.cpp Constants.cpp Core.cpp + DebugLoc.cpp Dominators.cpp Function.cpp GVMaterializer.cpp @@ -16,6 +17,7 @@ add_llvm_library(LLVMCore Instructions.cpp IntrinsicInst.cpp LLVMContext.cpp + LLVMContextImpl.cpp LeakDetector.cpp Metadata.cpp Module.cpp diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp index 10f8879..1553bd5 100644 --- a/lib/VMCore/Constants.cpp +++ b/lib/VMCore/Constants.cpp @@ -59,6 +59,7 @@ Constant *Constant::getNullValue(const Type *Ty) { case Type::PointerTyID: return ConstantPointerNull::get(cast<PointerType>(Ty)); case Type::StructTyID: + case Type::UnionTyID: case Type::ArrayTyID: case Type::VectorTyID: return ConstantAggregateZero::get(Ty); @@ -160,7 +161,7 @@ bool Constant::canTrap() const { /// isConstantUsed - Return true if the constant has users other than constant /// exprs and other dangling things. bool Constant::isConstantUsed() const { - for (use_const_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) { + for (const_use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) { const Constant *UC = dyn_cast<Constant>(*UI); if (UC == 0 || isa<GlobalValue>(UC)) return true; @@ -944,7 +945,8 @@ bool ConstantFP::isValueValidForType(const Type *Ty, const APFloat& Val) { // Factory Function Implementation ConstantAggregateZero* ConstantAggregateZero::get(const Type* Ty) { - assert((Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) && + assert((Ty->isStructTy() || Ty->isUnionTy() + || Ty->isArrayTy() || Ty->isVectorTy()) && "Cannot create an aggregate zero of non-aggregate type!"); LLVMContextImpl *pImpl = Ty->getContext().pImpl; @@ -1945,6 +1947,20 @@ const char *ConstantExpr::getOpcodeName() const { return Instruction::getOpcodeName(getOpcode()); } + + +GetElementPtrConstantExpr:: +GetElementPtrConstantExpr(Constant *C, const std::vector<Constant*> &IdxList, + const Type *DestTy) + : ConstantExpr(DestTy, Instruction::GetElementPtr, + OperandTraits<GetElementPtrConstantExpr>::op_end(this) + - (IdxList.size()+1), IdxList.size()+1) { + OperandList[0] = C; + for (unsigned i = 0, E = IdxList.size(); i != E; ++i) + OperandList[i+1] = IdxList[i]; +} + + //===----------------------------------------------------------------------===// // replaceUsesOfWithOnConstant implementations diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h index c798ba2..2f2fac5 100644 --- a/lib/VMCore/ConstantsContext.h +++ b/lib/VMCore/ConstantsContext.h @@ -15,6 +15,7 @@ #ifndef LLVM_CONSTANTSCONTEXT_H #define LLVM_CONSTANTSCONTEXT_H +#include "llvm/InlineAsm.h" #include "llvm/Instructions.h" #include "llvm/Operator.h" #include "llvm/Support/Debug.h" @@ -327,6 +328,39 @@ struct ExprMapKeyType { } }; +struct InlineAsmKeyType { + InlineAsmKeyType(StringRef AsmString, + StringRef Constraints, bool hasSideEffects, + bool isAlignStack) + : asm_string(AsmString), constraints(Constraints), + has_side_effects(hasSideEffects), is_align_stack(isAlignStack) {} + std::string asm_string; + std::string constraints; + bool has_side_effects; + bool is_align_stack; + bool operator==(const InlineAsmKeyType& that) const { + return this->asm_string == that.asm_string && + this->constraints == that.constraints && + this->has_side_effects == that.has_side_effects && + this->is_align_stack == that.is_align_stack; + } + bool operator<(const InlineAsmKeyType& that) const { + if (this->asm_string != that.asm_string) + return this->asm_string < that.asm_string; + if (this->constraints != that.constraints) + return this->constraints < that.constraints; + if (this->has_side_effects != that.has_side_effects) + return this->has_side_effects < that.has_side_effects; + if (this->is_align_stack != that.is_align_stack) + return this->is_align_stack < that.is_align_stack; + return false; + } + + bool operator!=(const InlineAsmKeyType& that) const { + return !(*this == that); + } +}; + // The number of operands for each ConstantCreator::create method is // determined by the ConstantTraits template. // ConstantCreator - A class that is used to create constants by @@ -517,6 +551,23 @@ struct ConstantKeyData<UndefValue> { } }; +template<> +struct ConstantCreator<InlineAsm, PointerType, InlineAsmKeyType> { + static InlineAsm *create(const PointerType *Ty, const InlineAsmKeyType &Key) { + return new InlineAsm(Ty, Key.asm_string, Key.constraints, + Key.has_side_effects, Key.is_align_stack); + } +}; + +template<> +struct ConstantKeyData<InlineAsm> { + typedef InlineAsmKeyType ValType; + static ValType getValType(InlineAsm *Asm) { + return InlineAsmKeyType(Asm->getAsmString(), Asm->getConstraintString(), + Asm->hasSideEffects(), Asm->isAlignStack()); + } +}; + template<class ValType, class TypeClass, class ConstantClass, bool HasLargeKey = false /*true for arrays and structs*/ > class ConstantUniqueMap : public AbstractTypeUser { @@ -549,8 +600,8 @@ public: void freeConstants() { for (typename MapTy::iterator I=Map.begin(), E=Map.end(); I != E; ++I) { - if (I->second->use_empty()) - delete I->second; + // Asserts that use_empty(). + delete I->second; } } diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp index f4f65c5..44d487a 100644 --- a/lib/VMCore/Core.cpp +++ b/lib/VMCore/Core.cpp @@ -1651,7 +1651,7 @@ LLVMBasicBlockRef LLVMGetInsertBlock(LLVMBuilderRef Builder) { } void LLVMClearInsertionPosition(LLVMBuilderRef Builder) { - unwrap(Builder)->ClearInsertionPoint (); + unwrap(Builder)->ClearInsertionPoint(); } void LLVMInsertIntoBuilder(LLVMBuilderRef Builder, LLVMValueRef Instr) { @@ -1670,11 +1670,13 @@ void LLVMDisposeBuilder(LLVMBuilderRef Builder) { /*--.. Metadata builders ...................................................--*/ void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L) { - unwrap(Builder)->SetCurrentDebugLocation(L? unwrap<MDNode>(L) : NULL); + MDNode *Loc = L ? unwrap<MDNode>(L) : NULL; + unwrap(Builder)->SetCurrentDebugLocation(NewDebugLoc::getFromDILocation(Loc)); } LLVMValueRef LLVMGetCurrentDebugLocation(LLVMBuilderRef Builder) { - return wrap(unwrap(Builder)->getCurrentDebugLocation()); + return wrap(unwrap(Builder)->getCurrentDebugLocation() + .getAsMDNode(unwrap(Builder)->getContext())); } void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst) { diff --git a/lib/VMCore/DebugLoc.cpp b/lib/VMCore/DebugLoc.cpp new file mode 100644 index 0000000..f02ce57 --- /dev/null +++ b/lib/VMCore/DebugLoc.cpp @@ -0,0 +1,288 @@ +//===-- DebugLoc.cpp - Implement DebugLoc class ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/DebugLoc.h" +#include "LLVMContextImpl.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// DebugLoc Implementation +//===----------------------------------------------------------------------===// + +MDNode *NewDebugLoc::getScope(const LLVMContext &Ctx) const { + if (ScopeIdx == 0) return 0; + + if (ScopeIdx > 0) { + // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at + // position specified. + assert(unsigned(ScopeIdx) <= Ctx.pImpl->ScopeRecords.size() && + "Invalid ScopeIdx!"); + return Ctx.pImpl->ScopeRecords[ScopeIdx-1].get(); + } + + // Otherwise, the index is in the ScopeInlinedAtRecords array. + assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() && + "Invalid ScopeIdx"); + return Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].first.get(); +} + +MDNode *NewDebugLoc::getInlinedAt(const LLVMContext &Ctx) const { + // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at + // position specified. Zero is invalid. + if (ScopeIdx >= 0) return 0; + + // Otherwise, the index is in the ScopeInlinedAtRecords array. + assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() && + "Invalid ScopeIdx"); + return Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].second.get(); +} + +/// Return both the Scope and the InlinedAt values. +void NewDebugLoc::getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA, + const LLVMContext &Ctx) const { + if (ScopeIdx == 0) { + Scope = IA = 0; + return; + } + + if (ScopeIdx > 0) { + // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at + // position specified. + assert(unsigned(ScopeIdx) <= Ctx.pImpl->ScopeRecords.size() && + "Invalid ScopeIdx!"); + Scope = Ctx.pImpl->ScopeRecords[ScopeIdx-1].get(); + IA = 0; + return; + } + + // Otherwise, the index is in the ScopeInlinedAtRecords array. + assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() && + "Invalid ScopeIdx"); + Scope = Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].first.get(); + IA = Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].second.get(); +} + + +NewDebugLoc NewDebugLoc::get(unsigned Line, unsigned Col, + MDNode *Scope, MDNode *InlinedAt) { + NewDebugLoc Result; + + // If no scope is available, this is an unknown location. + if (Scope == 0) return Result; + + // Saturate line and col to "unknown". + if (Col > 255) Col = 0; + if (Line >= (1 << 24)) Line = 0; + Result.LineCol = Line | (Col << 24); + + LLVMContext &Ctx = Scope->getContext(); + + // If there is no inlined-at location, use the ScopeRecords array. + if (InlinedAt == 0) + Result.ScopeIdx = Ctx.pImpl->getOrAddScopeRecordIdxEntry(Scope, 0); + else + Result.ScopeIdx = Ctx.pImpl->getOrAddScopeInlinedAtIdxEntry(Scope, + InlinedAt, 0); + + return Result; +} + +/// getAsMDNode - This method converts the compressed DebugLoc node into a +/// DILocation compatible MDNode. +MDNode *NewDebugLoc::getAsMDNode(const LLVMContext &Ctx) const { + if (isUnknown()) return 0; + + MDNode *Scope, *IA; + getScopeAndInlinedAt(Scope, IA, Ctx); + assert(Scope && "If scope is null, this should be isUnknown()"); + + LLVMContext &Ctx2 = Scope->getContext(); + const Type *Int32 = Type::getInt32Ty(Ctx2); + Value *Elts[] = { + ConstantInt::get(Int32, getLine()), ConstantInt::get(Int32, getCol()), + Scope, IA + }; + return MDNode::get(Ctx2, &Elts[0], 4); +} + +/// getFromDILocation - Translate the DILocation quad into a NewDebugLoc. +NewDebugLoc NewDebugLoc::getFromDILocation(MDNode *N) { + if (N == 0 || N->getNumOperands() != 4) return NewDebugLoc(); + + MDNode *Scope = dyn_cast_or_null<MDNode>(N->getOperand(2)); + if (Scope == 0) return NewDebugLoc(); + + unsigned LineNo = 0, ColNo = 0; + if (ConstantInt *Line = dyn_cast_or_null<ConstantInt>(N->getOperand(0))) + LineNo = Line->getZExtValue(); + if (ConstantInt *Col = dyn_cast_or_null<ConstantInt>(N->getOperand(1))) + ColNo = Col->getZExtValue(); + + return get(LineNo, ColNo, Scope, dyn_cast_or_null<MDNode>(N->getOperand(3))); +} + +//===----------------------------------------------------------------------===// +// LLVMContextImpl Implementation +//===----------------------------------------------------------------------===// + +int LLVMContextImpl::getOrAddScopeRecordIdxEntry(MDNode *Scope, + int ExistingIdx) { + // If we already have an entry for this scope, return it. + int &Idx = ScopeRecordIdx[Scope]; + if (Idx) return Idx; + + // If we don't have an entry, but ExistingIdx is specified, use it. + if (ExistingIdx) + return Idx = ExistingIdx; + + // Otherwise add a new entry. + + // Start out ScopeRecords with a minimal reasonable size to avoid + // excessive reallocation starting out. + if (ScopeRecords.empty()) + ScopeRecords.reserve(128); + + // Index is biased by 1 for index. + Idx = ScopeRecords.size()+1; + ScopeRecords.push_back(DebugRecVH(Scope, this, Idx)); + return Idx; +} + +int LLVMContextImpl::getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA, + int ExistingIdx) { + // If we already have an entry, return it. + int &Idx = ScopeInlinedAtIdx[std::make_pair(Scope, IA)]; + if (Idx) return Idx; + + // If we don't have an entry, but ExistingIdx is specified, use it. + if (ExistingIdx) + return Idx = ExistingIdx; + + // Start out ScopeInlinedAtRecords with a minimal reasonable size to avoid + // excessive reallocation starting out. + if (ScopeInlinedAtRecords.empty()) + ScopeInlinedAtRecords.reserve(128); + + // Index is biased by 1 and negated. + Idx = -ScopeInlinedAtRecords.size()-1; + ScopeInlinedAtRecords.push_back(std::make_pair(DebugRecVH(Scope, this, Idx), + DebugRecVH(IA, this, Idx))); + return Idx; +} + + +//===----------------------------------------------------------------------===// +// DebugRecVH Implementation +//===----------------------------------------------------------------------===// + +/// deleted - The MDNode this is pointing to got deleted, so this pointer needs +/// to drop to null and we need remove our entry from the DenseMap. +void DebugRecVH::deleted() { + // If this is a non-canonical reference, just drop the value to null, we know + // it doesn't have a map entry. + if (Idx == 0) { + setValPtr(0); + return; + } + + MDNode *Cur = get(); + + // If the index is positive, it is an entry in ScopeRecords. + if (Idx > 0) { + assert(Ctx->ScopeRecordIdx[Cur] == Idx && "Mapping out of date!"); + Ctx->ScopeRecordIdx.erase(Cur); + // Reset this VH to null and we're done. + setValPtr(0); + Idx = 0; + return; + } + + // Otherwise, it is an entry in ScopeInlinedAtRecords, we don't know if it + // is the scope or the inlined-at record entry. + assert(unsigned(-Idx-1) < Ctx->ScopeInlinedAtRecords.size()); + std::pair<DebugRecVH, DebugRecVH> &Entry = Ctx->ScopeInlinedAtRecords[-Idx-1]; + assert((this == &Entry.first || this == &Entry.second) && + "Mapping out of date!"); + + MDNode *OldScope = Entry.first.get(); + MDNode *OldInlinedAt = Entry.second.get(); + assert(OldScope != 0 && OldInlinedAt != 0 && + "Entry should be non-canonical if either val dropped to null"); + + // Otherwise, we do have an entry in it, nuke it and we're done. + assert(Ctx->ScopeInlinedAtIdx[std::make_pair(OldScope, OldInlinedAt)] == Idx&& + "Mapping out of date"); + Ctx->ScopeInlinedAtIdx.erase(std::make_pair(OldScope, OldInlinedAt)); + + // Reset this VH to null. Drop both 'Idx' values to null to indicate that + // we're in non-canonical form now. + setValPtr(0); + Entry.first.Idx = Entry.second.Idx = 0; +} + +void DebugRecVH::allUsesReplacedWith(Value *NewVa) { + // If being replaced with a non-mdnode value (e.g. undef) handle this as if + // the mdnode got deleted. + MDNode *NewVal = dyn_cast<MDNode>(NewVa); + if (NewVal == 0) return deleted(); + + // If this is a non-canonical reference, just change it, we know it already + // doesn't have a map entry. + if (Idx == 0) { + setValPtr(NewVa); + return; + } + + MDNode *OldVal = get(); + assert(OldVal != NewVa && "Node replaced with self?"); + + // If the index is positive, it is an entry in ScopeRecords. + if (Idx > 0) { + assert(Ctx->ScopeRecordIdx[OldVal] == Idx && "Mapping out of date!"); + Ctx->ScopeRecordIdx.erase(OldVal); + setValPtr(NewVal); + + int NewEntry = Ctx->getOrAddScopeRecordIdxEntry(NewVal, Idx); + + // If NewVal already has an entry, this becomes a non-canonical reference, + // just drop Idx to 0 to signify this. + if (NewEntry != Idx) + Idx = 0; + return; + } + + // Otherwise, it is an entry in ScopeInlinedAtRecords, we don't know if it + // is the scope or the inlined-at record entry. + assert(unsigned(-Idx-1) < Ctx->ScopeInlinedAtRecords.size()); + std::pair<DebugRecVH, DebugRecVH> &Entry = Ctx->ScopeInlinedAtRecords[-Idx-1]; + assert((this == &Entry.first || this == &Entry.second) && + "Mapping out of date!"); + + MDNode *OldScope = Entry.first.get(); + MDNode *OldInlinedAt = Entry.second.get(); + assert(OldScope != 0 && OldInlinedAt != 0 && + "Entry should be non-canonical if either val dropped to null"); + + // Otherwise, we do have an entry in it, nuke it and we're done. + assert(Ctx->ScopeInlinedAtIdx[std::make_pair(OldScope, OldInlinedAt)] == Idx&& + "Mapping out of date"); + Ctx->ScopeInlinedAtIdx.erase(std::make_pair(OldScope, OldInlinedAt)); + + // Reset this VH to the new value. + setValPtr(NewVal); + + int NewIdx = Ctx->getOrAddScopeInlinedAtIdxEntry(Entry.first.get(), + Entry.second.get(), Idx); + // If NewVal already has an entry, this becomes a non-canonical reference, + // just drop Idx to 0 to signify this. + if (NewIdx != Idx) { + std::pair<DebugRecVH, DebugRecVH> &Entry=Ctx->ScopeInlinedAtRecords[-Idx-1]; + Entry.first.Idx = Entry.second.Idx = 0; + } +} diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp index dbc283e..8f94efc 100644 --- a/lib/VMCore/Function.cpp +++ b/lib/VMCore/Function.cpp @@ -16,6 +16,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/LeakDetector.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/StringPool.h" @@ -400,13 +401,16 @@ Function *Intrinsic::getDeclaration(Module *M, ID id, const Type **Tys, #include "llvm/Intrinsics.gen" #undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN - /// hasAddressTaken - returns true if there are any uses of this function - /// other than direct calls or invokes to it. -bool Function::hasAddressTaken() const { - for (Value::use_const_iterator I = use_begin(), E = use_end(); I != E; ++I) { - if (I.getOperandNo() != 0 || - (!isa<CallInst>(*I) && !isa<InvokeInst>(*I))) - return true; +/// hasAddressTaken - returns true if there are any uses of this function +/// other than direct calls or invokes to it. +bool Function::hasAddressTaken(const User* *PutOffender) const { + for (Value::const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) { + const User *U = *I; + if (!isa<CallInst>(U) && !isa<InvokeInst>(U)) + return PutOffender ? (*PutOffender = U, true) : true; + ImmutableCallSite CS(cast<Instruction>(U)); + if (!CS.isCallee(I)) + return PutOffender ? (*PutOffender = U, true) : true; } return false; } diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp index 489ec65..b758eb8 100644 --- a/lib/VMCore/Globals.cpp +++ b/lib/VMCore/Globals.cpp @@ -61,8 +61,8 @@ void GlobalValue::Dematerialize() { /// that want to check to see if a global is unused, but don't want to deal /// with potentially dead constants hanging off of the globals. void GlobalValue::removeDeadConstantUsers() const { - Value::use_const_iterator I = use_begin(), E = use_end(); - Value::use_const_iterator LastNonDeadUser = E; + Value::const_use_iterator I = use_begin(), E = use_end(); + Value::const_use_iterator LastNonDeadUser = E; while (I != E) { if (const Constant *User = dyn_cast<Constant>(*I)) { if (!removeDeadUsersOfConstant(User)) { diff --git a/lib/VMCore/IRBuilder.cpp b/lib/VMCore/IRBuilder.cpp index 9f2786e..c1b783c 100644 --- a/lib/VMCore/IRBuilder.cpp +++ b/lib/VMCore/IRBuilder.cpp @@ -32,19 +32,6 @@ Value *IRBuilderBase::CreateGlobalString(const char *Str, const Twine &Name) { return GV; } -/// SetCurrentDebugLocation - Set location information used by debugging -/// information. -void IRBuilderBase::SetCurrentDebugLocation(MDNode *L) { - if (DbgMDKind == 0) - DbgMDKind = Context.getMDKindID("dbg"); - CurDbgLocation = L; -} - -void IRBuilderBase::SetInstDebugLocation(Instruction *I) const { - if (CurDbgLocation) - I->setMetadata(DbgMDKind, CurDbgLocation); -} - const Type *IRBuilderBase::getCurrentFunctionReturnType() const { assert(BB && BB->getParent() && "No current function!"); return BB->getParent()->getReturnType(); diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp index 6355834..0d2eca9 100644 --- a/lib/VMCore/InlineAsm.cpp +++ b/lib/VMCore/InlineAsm.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/InlineAsm.h" +#include "ConstantsContext.h" +#include "LLVMContextImpl.h" #include "llvm/DerivedTypes.h" #include <algorithm> #include <cctype> @@ -23,28 +25,29 @@ InlineAsm::~InlineAsm() { } -// NOTE: when memoizing the function type, we have to be careful to handle the -// case when the type gets refined. - InlineAsm *InlineAsm::get(const FunctionType *Ty, StringRef AsmString, StringRef Constraints, bool hasSideEffects, bool isAlignStack) { - // FIXME: memoize! - return new InlineAsm(Ty, AsmString, Constraints, hasSideEffects, - isAlignStack); + InlineAsmKeyType Key(AsmString, Constraints, hasSideEffects, isAlignStack); + LLVMContextImpl *pImpl = Ty->getContext().pImpl; + return pImpl->InlineAsms.getOrCreate(PointerType::getUnqual(Ty), Key); } -InlineAsm::InlineAsm(const FunctionType *Ty, StringRef asmString, - StringRef constraints, bool hasSideEffects, +InlineAsm::InlineAsm(const PointerType *Ty, const std::string &asmString, + const std::string &constraints, bool hasSideEffects, bool isAlignStack) - : Value(PointerType::getUnqual(Ty), - Value::InlineAsmVal), + : Value(Ty, Value::InlineAsmVal), AsmString(asmString), Constraints(constraints), HasSideEffects(hasSideEffects), IsAlignStack(isAlignStack) { // Do various checks on the constraint string and type. - assert(Verify(Ty, constraints) && "Function type not legal for constraints!"); + assert(Verify(getFunctionType(), constraints) && + "Function type not legal for constraints!"); +} + +void InlineAsm::destroyConstant() { + delete this; } const FunctionType *InlineAsm::getFunctionType() const { diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp index 3fabfd0..a37fe07 100644 --- a/lib/VMCore/Instruction.cpp +++ b/lib/VMCore/Instruction.cpp @@ -283,7 +283,7 @@ bool Instruction::isSameOperationAs(const Instruction *I) const { /// specified block. Note that PHI nodes are considered to evaluate their /// operands in the corresponding predecessor block. bool Instruction::isUsedOutsideOfBlock(const BasicBlock *BB) const { - for (use_const_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) { + for (const_use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) { // PHI nodes uses values in the corresponding predecessor block. For other // instructions, just check to see whether the parent of the use matches up. const PHINode *PN = dyn_cast<PHINode>(*UI); diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index 8f4763f..4609a64 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -31,23 +31,18 @@ using namespace llvm; //===----------------------------------------------------------------------===// #define CALLSITE_DELEGATE_GETTER(METHOD) \ - Instruction *II(getInstruction()); \ + Instruction *II = getInstruction(); \ return isCall() \ ? cast<CallInst>(II)->METHOD \ : cast<InvokeInst>(II)->METHOD #define CALLSITE_DELEGATE_SETTER(METHOD) \ - Instruction *II(getInstruction()); \ + Instruction *II = getInstruction(); \ if (isCall()) \ cast<CallInst>(II)->METHOD; \ else \ cast<InvokeInst>(II)->METHOD -CallSite::CallSite(Instruction *C) { - assert((isa<CallInst>(C) || isa<InvokeInst>(C)) && "Not a call!"); - I.setPointer(C); - I.setInt(isa<CallInst>(C)); -} CallingConv::ID CallSite::getCallingConv() const { CALLSITE_DELEGATE_GETTER(getCallingConv()); } @@ -66,6 +61,17 @@ bool CallSite::paramHasAttr(uint16_t i, Attributes attr) const { uint16_t CallSite::getParamAlignment(uint16_t i) const { CALLSITE_DELEGATE_GETTER(getParamAlignment(i)); } + +/// @brief Return true if the call should not be inlined. +bool CallSite::isNoInline() const { + CALLSITE_DELEGATE_GETTER(isNoInline()); +} + +void CallSite::setIsNoInline(bool Value) { + CALLSITE_DELEGATE_GETTER(setIsNoInline(Value)); +} + + bool CallSite::doesNotAccessMemory() const { CALLSITE_DELEGATE_GETTER(doesNotAccessMemory()); } @@ -98,6 +104,13 @@ bool CallSite::hasArgument(const Value *Arg) const { return false; } +User::op_iterator CallSite::getCallee() const { + Instruction *II(getInstruction()); + return isCall() + ? cast<CallInst>(II)->op_begin() + : cast<InvokeInst>(II)->op_end() - 3; // Skip BB, BB, Function +} + #undef CALLSITE_DELEGATE_GETTER #undef CALLSITE_DELEGATE_SETTER @@ -611,24 +624,24 @@ Instruction* CallInst::CreateFree(Value* Source, BasicBlock *InsertAtEnd) { void InvokeInst::init(Value *Fn, BasicBlock *IfNormal, BasicBlock *IfException, Value* const *Args, unsigned NumArgs) { assert(NumOperands == 3+NumArgs && "NumOperands not set up?"); - Use *OL = OperandList; - OL[0] = Fn; - OL[1] = IfNormal; - OL[2] = IfException; + Op<-3>() = Fn; + Op<-2>() = IfNormal; + Op<-1>() = IfException; const FunctionType *FTy = cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType()); FTy = FTy; // silence warning. assert(((NumArgs == FTy->getNumParams()) || (FTy->isVarArg() && NumArgs > FTy->getNumParams())) && - "Calling a function with bad signature"); + "Invoking a function with bad signature"); + Use *OL = OperandList; for (unsigned i = 0, e = NumArgs; i != e; i++) { assert((i >= FTy->getNumParams() || FTy->getParamType(i) == Args[i]->getType()) && "Invoking a function with a bad signature!"); - OL[i+3] = Args[i]; + OL[i] = Args[i]; } } diff --git a/lib/VMCore/LLVMContext.cpp b/lib/VMCore/LLVMContext.cpp index 5a8ea5c..2a870ec 100644 --- a/lib/VMCore/LLVMContext.cpp +++ b/lib/VMCore/LLVMContext.cpp @@ -26,19 +26,52 @@ LLVMContext& llvm::getGlobalContext() { return *GlobalContext; } -LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) { } +LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) { + // Create the first metadata kind, which is always 'dbg'. + unsigned DbgID = getMDKindID("dbg"); + assert(DbgID == MD_dbg && "dbg kind id drifted"); (void)DbgID; +} LLVMContext::~LLVMContext() { delete pImpl; } -GetElementPtrConstantExpr::GetElementPtrConstantExpr - (Constant *C, - const std::vector<Constant*> &IdxList, - const Type *DestTy) - : ConstantExpr(DestTy, Instruction::GetElementPtr, - OperandTraits<GetElementPtrConstantExpr>::op_end(this) - - (IdxList.size()+1), - IdxList.size()+1) { - OperandList[0] = C; - for (unsigned i = 0, E = IdxList.size(); i != E; ++i) - OperandList[i+1] = IdxList[i]; + +#ifndef NDEBUG +/// isValidName - Return true if Name is a valid custom metadata handler name. +static bool isValidName(StringRef MDName) { + if (MDName.empty()) + return false; + + if (!isalpha(MDName[0])) + return false; + + for (StringRef::iterator I = MDName.begin() + 1, E = MDName.end(); I != E; + ++I) { + if (!isalnum(*I) && *I != '_' && *I != '-' && *I != '.') + return false; + } + return true; +} +#endif + +/// getMDKindID - Return a unique non-zero ID for the specified metadata kind. +unsigned LLVMContext::getMDKindID(StringRef Name) const { + assert(isValidName(Name) && "Invalid MDNode name"); + + unsigned &Entry = pImpl->CustomMDKindNames[Name]; + + // If this is new, assign it its ID. + if (Entry == 0) Entry = pImpl->CustomMDKindNames.size(); + return Entry; } +/// getHandlerNames - Populate client supplied smallvector using custome +/// metadata name and ID. +void LLVMContext::getMDKindNames(SmallVectorImpl<StringRef> &Names) const { + Names.resize(pImpl->CustomMDKindNames.size()+1); + Names[0] = ""; + for (StringMap<unsigned>::const_iterator I = pImpl->CustomMDKindNames.begin(), + E = pImpl->CustomMDKindNames.end(); I != E; ++I) + // MD Handlers are numbered from 1. + Names[I->second] = I->first(); +} + + diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp new file mode 100644 index 0000000..b4553dd --- /dev/null +++ b/lib/VMCore/LLVMContextImpl.cpp @@ -0,0 +1,103 @@ +//===-- LLVMContextImpl.cpp - Implement LLVMContextImpl -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the opaque LLVMContextImpl. +// +//===----------------------------------------------------------------------===// + +#include "LLVMContextImpl.h" +#include <algorithm> + +LLVMContextImpl::LLVMContextImpl(LLVMContext &C) + : TheTrueVal(0), TheFalseVal(0), + VoidTy(C, Type::VoidTyID), + LabelTy(C, Type::LabelTyID), + FloatTy(C, Type::FloatTyID), + DoubleTy(C, Type::DoubleTyID), + MetadataTy(C, Type::MetadataTyID), + X86_FP80Ty(C, Type::X86_FP80TyID), + FP128Ty(C, Type::FP128TyID), + PPC_FP128Ty(C, Type::PPC_FP128TyID), + Int1Ty(C, 1), + Int8Ty(C, 8), + Int16Ty(C, 16), + Int32Ty(C, 32), + Int64Ty(C, 64), + AlwaysOpaqueTy(new OpaqueType(C)) { + // Make sure the AlwaysOpaqueTy stays alive as long as the Context. + AlwaysOpaqueTy->addRef(); + OpaqueTypes.insert(AlwaysOpaqueTy); +} + +namespace { +struct DropReferences { + // Takes the value_type of a ConstantUniqueMap's internal map, whose 'second' + // is a Constant*. + template<typename PairT> + void operator()(const PairT &P) { + P.second->dropAllReferences(); + } +}; +} + +LLVMContextImpl::~LLVMContextImpl() { + std::for_each(ExprConstants.map_begin(), ExprConstants.map_end(), + DropReferences()); + std::for_each(ArrayConstants.map_begin(), ArrayConstants.map_end(), + DropReferences()); + std::for_each(StructConstants.map_begin(), StructConstants.map_end(), + DropReferences()); + std::for_each(UnionConstants.map_begin(), UnionConstants.map_end(), + DropReferences()); + std::for_each(VectorConstants.map_begin(), VectorConstants.map_end(), + DropReferences()); + ExprConstants.freeConstants(); + ArrayConstants.freeConstants(); + StructConstants.freeConstants(); + UnionConstants.freeConstants(); + VectorConstants.freeConstants(); + AggZeroConstants.freeConstants(); + NullPtrConstants.freeConstants(); + UndefValueConstants.freeConstants(); + InlineAsms.freeConstants(); + for (IntMapTy::iterator I = IntConstants.begin(), E = IntConstants.end(); + I != E; ++I) { + delete I->second; + } + for (FPMapTy::iterator I = FPConstants.begin(), E = FPConstants.end(); + I != E; ++I) { + delete I->second; + } + AlwaysOpaqueTy->dropRef(); + for (OpaqueTypesTy::iterator I = OpaqueTypes.begin(), E = OpaqueTypes.end(); + I != E; ++I) { + (*I)->AbstractTypeUsers.clear(); + delete *I; + } + // Destroy MDNodes. ~MDNode can move and remove nodes between the MDNodeSet + // and the NonUniquedMDNodes sets, so copy the values out first. + SmallVector<MDNode*, 8> MDNodes; + MDNodes.reserve(MDNodeSet.size() + NonUniquedMDNodes.size()); + for (FoldingSetIterator<MDNode> I = MDNodeSet.begin(), E = MDNodeSet.end(); + I != E; ++I) { + MDNodes.push_back(&*I); + } + MDNodes.append(NonUniquedMDNodes.begin(), NonUniquedMDNodes.end()); + for (SmallVector<MDNode*, 8>::iterator I = MDNodes.begin(), + E = MDNodes.end(); I != E; ++I) { + (*I)->destroy(); + } + assert(MDNodeSet.empty() && NonUniquedMDNodes.empty() && + "Destroying all MDNodes didn't empty the Context's sets."); + // Destroy MDStrings. + for (StringMap<MDString*>::iterator I = MDStringCache.begin(), + E = MDStringCache.end(); I != E; ++I) { + delete I->second; + } +} diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h index 9978f40..d4ebf80 100644 --- a/lib/VMCore/LLVMContextImpl.h +++ b/lib/VMCore/LLVMContextImpl.h @@ -1,4 +1,4 @@ -//===-- LLVMContextImpl.h - The LLVMContextImpl opaque class --------------===// +//===-- LLVMContextImpl.h - The LLVMContextImpl opaque class ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,9 +19,9 @@ #include "LeaksContext.h" #include "TypesContext.h" #include "llvm/LLVMContext.h" -#include "llvm/Metadata.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/Metadata.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/ValueHandle.h" #include "llvm/ADT/APFloat.h" @@ -36,8 +36,6 @@ namespace llvm { class ConstantInt; class ConstantFP; -class MDString; -class MDNode; class LLVMContext; class Type; class Value; @@ -92,6 +90,29 @@ struct DenseMapAPFloatKeyInfo { } }; +/// DebugRecVH - This is a CallbackVH used to keep the Scope -> index maps +/// up to date as MDNodes mutate. This class is implemented in DebugLoc.cpp. +class DebugRecVH : public CallbackVH { + /// Ctx - This is the LLVM Context being referenced. + LLVMContextImpl *Ctx; + + /// Idx - The index into either ScopeRecordIdx or ScopeInlinedAtRecords that + /// this reference lives in. If this is zero, then it represents a + /// non-canonical entry that has no DenseMap value. This can happen due to + /// RAUW. + int Idx; +public: + DebugRecVH(MDNode *n, LLVMContextImpl *ctx, int idx) + : CallbackVH(n), Ctx(ctx), Idx(idx) {} + + MDNode *get() const { + return cast_or_null<MDNode>(getValPtr()); + } + + virtual void deleted(); + virtual void allUsesReplacedWith(Value *VNew); +}; + class LLVMContextImpl { public: typedef DenseMap<DenseMapAPIntKeyInfo::KeyTy, ConstantInt*, @@ -130,11 +151,12 @@ public: VectorConstantsTy VectorConstants; ConstantUniqueMap<char, PointerType, ConstantPointerNull> NullPtrConstants; - ConstantUniqueMap<char, Type, UndefValue> UndefValueConstants; DenseMap<std::pair<Function*, BasicBlock*> , BlockAddress*> BlockAddresses; ConstantUniqueMap<ExprMapKeyType, Type, ConstantExpr> ExprConstants; + + ConstantUniqueMap<InlineAsmKeyType, PointerType, InlineAsm> InlineAsms; ConstantInt *TheTrueVal; ConstantInt *TheFalseVal; @@ -195,72 +217,29 @@ public: /// context. DenseMap<const Instruction *, MDMapTy> MetadataStore; + /// ScopeRecordIdx - This is the index in ScopeRecords for an MDNode scope + /// entry with no "inlined at" element. + DenseMap<MDNode*, int> ScopeRecordIdx; - LLVMContextImpl(LLVMContext &C) : TheTrueVal(0), TheFalseVal(0), - VoidTy(C, Type::VoidTyID), - LabelTy(C, Type::LabelTyID), - FloatTy(C, Type::FloatTyID), - DoubleTy(C, Type::DoubleTyID), - MetadataTy(C, Type::MetadataTyID), - X86_FP80Ty(C, Type::X86_FP80TyID), - FP128Ty(C, Type::FP128TyID), - PPC_FP128Ty(C, Type::PPC_FP128TyID), - Int1Ty(C, 1), - Int8Ty(C, 8), - Int16Ty(C, 16), - Int32Ty(C, 32), - Int64Ty(C, 64), - AlwaysOpaqueTy(new OpaqueType(C)) { - // Make sure the AlwaysOpaqueTy stays alive as long as the Context. - AlwaysOpaqueTy->addRef(); - OpaqueTypes.insert(AlwaysOpaqueTy); - } - - ~LLVMContextImpl() { - ExprConstants.freeConstants(); - ArrayConstants.freeConstants(); - StructConstants.freeConstants(); - VectorConstants.freeConstants(); - AggZeroConstants.freeConstants(); - NullPtrConstants.freeConstants(); - UndefValueConstants.freeConstants(); - for (IntMapTy::iterator I = IntConstants.begin(), E = IntConstants.end(); - I != E; ++I) { - if (I->second->use_empty()) - delete I->second; - } - for (FPMapTy::iterator I = FPConstants.begin(), E = FPConstants.end(); - I != E; ++I) { - if (I->second->use_empty()) - delete I->second; - } - AlwaysOpaqueTy->dropRef(); - for (OpaqueTypesTy::iterator I = OpaqueTypes.begin(), E = OpaqueTypes.end(); - I != E; ++I) { - (*I)->AbstractTypeUsers.clear(); - delete *I; - } - // Destroy MDNodes. ~MDNode can move and remove nodes between the MDNodeSet - // and the NonUniquedMDNodes sets, so copy the values out first. - SmallVector<MDNode*, 8> MDNodes; - MDNodes.reserve(MDNodeSet.size() + NonUniquedMDNodes.size()); - for (FoldingSetIterator<MDNode> I = MDNodeSet.begin(), E = MDNodeSet.end(); - I != E; ++I) { - MDNodes.push_back(&*I); - } - MDNodes.append(NonUniquedMDNodes.begin(), NonUniquedMDNodes.end()); - for (SmallVector<MDNode*, 8>::iterator I = MDNodes.begin(), - E = MDNodes.end(); I != E; ++I) { - (*I)->destroy(); - } - assert(MDNodeSet.empty() && NonUniquedMDNodes.empty() && - "Destroying all MDNodes didn't empty the Context's sets."); - // Destroy MDStrings. - for (StringMap<MDString*>::iterator I = MDStringCache.begin(), - E = MDStringCache.end(); I != E; ++I) { - delete I->second; - } - } + /// ScopeRecords - These are the actual mdnodes (in a value handle) for an + /// index. The ValueHandle ensures that ScopeRecordIdx stays up to date if + /// the MDNode is RAUW'd. + std::vector<DebugRecVH> ScopeRecords; + + /// ScopeInlinedAtIdx - This is the index in ScopeInlinedAtRecords for an + /// scope/inlined-at pair. + DenseMap<std::pair<MDNode*, MDNode*>, int> ScopeInlinedAtIdx; + + /// ScopeInlinedAtRecords - These are the actual mdnodes (in value handles) + /// for an index. The ValueHandle ensures that ScopeINlinedAtIdx stays up + /// to date. + std::vector<std::pair<DebugRecVH, DebugRecVH> > ScopeInlinedAtRecords; + + int getOrAddScopeRecordIdxEntry(MDNode *N, int ExistingIdx); + int getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,int ExistingIdx); + + LLVMContextImpl(LLVMContext &C); + ~LLVMContextImpl(); }; } diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp index 06d4fd4..73e6091 100644 --- a/lib/VMCore/Metadata.cpp +++ b/lib/VMCore/Metadata.cpp @@ -182,19 +182,6 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals, unsigned NumVals, FunctionLocalness FL, bool Insert) { LLVMContextImpl *pImpl = Context.pImpl; - FoldingSetNodeID ID; - for (unsigned i = 0; i != NumVals; ++i) - ID.AddPointer(Vals[i]); - - void *InsertPoint; - MDNode *N = NULL; - - if ((N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint))) - return N; - - if (!Insert) - return NULL; - bool isFunctionLocal = false; switch (FL) { case FL_Unknown: @@ -216,6 +203,20 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals, break; } + FoldingSetNodeID ID; + for (unsigned i = 0; i != NumVals; ++i) + ID.AddPointer(Vals[i]); + ID.AddBoolean(isFunctionLocal); + + void *InsertPoint; + MDNode *N = NULL; + + if ((N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint))) + return N; + + if (!Insert) + return NULL; + // Coallocate space for the node and Operands together, then placement new. void *Ptr = malloc(sizeof(MDNode)+NumVals*sizeof(MDNodeOperand)); N = new (Ptr) MDNode(Context, Vals, NumVals, isFunctionLocal); @@ -248,6 +249,7 @@ Value *MDNode::getOperand(unsigned i) const { void MDNode::Profile(FoldingSetNodeID &ID) const { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) ID.AddPointer(getOperand(i)); + ID.AddBoolean(isFunctionLocal()); } void MDNode::setIsNotUniqued() { @@ -410,50 +412,6 @@ StringRef NamedMDNode::getName() const { } //===----------------------------------------------------------------------===// -// LLVMContext MDKind naming implementation. -// - -#ifndef NDEBUG -/// isValidName - Return true if Name is a valid custom metadata handler name. -static bool isValidName(StringRef MDName) { - if (MDName.empty()) - return false; - - if (!isalpha(MDName[0])) - return false; - - for (StringRef::iterator I = MDName.begin() + 1, E = MDName.end(); I != E; - ++I) { - if (!isalnum(*I) && *I != '_' && *I != '-' && *I != '.') - return false; - } - return true; -} -#endif - -/// getMDKindID - Return a unique non-zero ID for the specified metadata kind. -unsigned LLVMContext::getMDKindID(StringRef Name) const { - assert(isValidName(Name) && "Invalid MDNode name"); - - unsigned &Entry = pImpl->CustomMDKindNames[Name]; - - // If this is new, assign it its ID. - if (Entry == 0) Entry = pImpl->CustomMDKindNames.size(); - return Entry; -} - -/// getHandlerNames - Populate client supplied smallvector using custome -/// metadata name and ID. -void LLVMContext::getMDKindNames(SmallVectorImpl<StringRef> &Names) const { - Names.resize(pImpl->CustomMDKindNames.size()+1); - Names[0] = ""; - for (StringMap<unsigned>::const_iterator I = pImpl->CustomMDKindNames.begin(), - E = pImpl->CustomMDKindNames.end(); I != E; ++I) - // MD Handlers are numbered from 1. - Names[I->second] = I->first(); -} - -//===----------------------------------------------------------------------===// // Instruction Metadata method implementations. // @@ -466,18 +424,29 @@ MDNode *Instruction::getMetadataImpl(const char *Kind) const { return getMetadataImpl(getContext().getMDKindID(Kind)); } +void Instruction::setDbgMetadata(MDNode *Node) { + DbgLoc = NewDebugLoc::getFromDILocation(Node); +} + /// setMetadata - Set the metadata of of the specified kind to the specified /// node. This updates/replaces metadata if already present, or removes it if /// Node is null. void Instruction::setMetadata(unsigned KindID, MDNode *Node) { if (Node == 0 && !hasMetadata()) return; + // Handle 'dbg' as a special case since it is not stored in the hash table. + if (KindID == LLVMContext::MD_dbg) { + DbgLoc = NewDebugLoc::getFromDILocation(Node); + return; + } + // Handle the case when we're adding/updating metadata on an instruction. if (Node) { LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this]; - assert(!Info.empty() == hasMetadata() && "HasMetadata bit is wonked"); + assert(!Info.empty() == hasMetadataHashEntry() && + "HasMetadata bit is wonked"); if (Info.empty()) { - setHasMetadata(true); + setHasMetadataHashEntry(true); } else { // Handle replacement of an existing value. for (unsigned i = 0, e = Info.size(); i != e; ++i) @@ -493,18 +462,19 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) { } // Otherwise, we're removing metadata from an instruction. - assert(hasMetadata() && getContext().pImpl->MetadataStore.count(this) && + assert(hasMetadataHashEntry() && + getContext().pImpl->MetadataStore.count(this) && "HasMetadata bit out of date!"); LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this]; // Common case is removing the only entry. if (Info.size() == 1 && Info[0].first == KindID) { getContext().pImpl->MetadataStore.erase(this); - setHasMetadata(false); + setHasMetadataHashEntry(false); return; } - // Handle replacement of an existing value. + // Handle removal of an existing value. for (unsigned i = 0, e = Info.size(); i != e; ++i) if (Info[i].first == KindID) { Info[i] = Info.back(); @@ -516,8 +486,14 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) { } MDNode *Instruction::getMetadataImpl(unsigned KindID) const { + // Handle 'dbg' as a special case since it is not stored in the hash table. + if (KindID == LLVMContext::MD_dbg) + return DbgLoc.getAsMDNode(getContext()); + + if (!hasMetadataHashEntry()) return 0; + LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this]; - assert(hasMetadata() && !Info.empty() && "Shouldn't have called this"); + assert(!Info.empty() && "bit out of sync with hash table"); for (LLVMContextImpl::MDMapTy::iterator I = Info.begin(), E = Info.end(); I != E; ++I) @@ -527,14 +503,23 @@ MDNode *Instruction::getMetadataImpl(unsigned KindID) const { } void Instruction::getAllMetadataImpl(SmallVectorImpl<std::pair<unsigned, - MDNode*> > &Result)const { - assert(hasMetadata() && getContext().pImpl->MetadataStore.count(this) && + MDNode*> > &Result) const { + Result.clear(); + + // Handle 'dbg' as a special case since it is not stored in the hash table. + if (!DbgLoc.isUnknown()) { + Result.push_back(std::make_pair((unsigned)LLVMContext::MD_dbg, + DbgLoc.getAsMDNode(getContext()))); + if (!hasMetadataHashEntry()) return; + } + + assert(hasMetadataHashEntry() && + getContext().pImpl->MetadataStore.count(this) && "Shouldn't have called this"); const LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore.find(this)->second; assert(!Info.empty() && "Shouldn't have called this"); - Result.clear(); Result.append(Info.begin(), Info.end()); // Sort the resulting array so it is stable. @@ -542,10 +527,32 @@ void Instruction::getAllMetadataImpl(SmallVectorImpl<std::pair<unsigned, array_pod_sort(Result.begin(), Result.end()); } +void Instruction:: +getAllMetadataOtherThanDebugLocImpl(SmallVectorImpl<std::pair<unsigned, + MDNode*> > &Result) const { + Result.clear(); + assert(hasMetadataHashEntry() && + getContext().pImpl->MetadataStore.count(this) && + "Shouldn't have called this"); + const LLVMContextImpl::MDMapTy &Info = + getContext().pImpl->MetadataStore.find(this)->second; + assert(!Info.empty() && "Shouldn't have called this"); + + Result.append(Info.begin(), Info.end()); + + // Sort the resulting array so it is stable. + if (Result.size() > 1) + array_pod_sort(Result.begin(), Result.end()); +} + + /// removeAllMetadata - Remove all metadata from this instruction. void Instruction::removeAllMetadata() { assert(hasMetadata() && "Caller should check"); - getContext().pImpl->MetadataStore.erase(this); - setHasMetadata(false); + DbgLoc = NewDebugLoc(); + if (hasMetadataHashEntry()) { + getContext().pImpl->MetadataStore.erase(this); + setHasMetadataHashEntry(false); + } } diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp index 001bb00..94840f0 100644 --- a/lib/VMCore/Module.cpp +++ b/lib/VMCore/Module.cpp @@ -82,7 +82,7 @@ Module::Endianness Module::getEndianness() const { while (!temp.empty()) { StringRef token = DataLayout; - tie(token, temp) = getToken(DataLayout, "-"); + tie(token, temp) = getToken(temp, "-"); if (token[0] == 'e') { ret = LittleEndian; diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp index c4dfe14..6774cec 100644 --- a/lib/VMCore/PassManager.cpp +++ b/lib/VMCore/PassManager.cpp @@ -378,17 +378,19 @@ namespace { static ManagedStatic<sys::SmartMutex<true> > TimingInfoMutex; class TimingInfo { - std::map<Pass*, Timer> TimingData; + DenseMap<Pass*, Timer*> TimingData; TimerGroup TG; - public: // Use 'create' member to get this. TimingInfo() : TG("... Pass execution timing report ...") {} // TimingDtor - Print out information about timing information ~TimingInfo() { - // Delete all of the timers... - TimingData.clear(); + // Delete all of the timers, which accumulate their info into the + // TimerGroup. + for (DenseMap<Pass*, Timer*>::iterator I = TimingData.begin(), + E = TimingData.end(); I != E; ++I) + delete I->second; // TimerGroup is deleted next, printing the report. } @@ -397,18 +399,15 @@ public: // null. It may be called multiple times. static void createTheTimeInfo(); - /// passStarted - This method creates a timer for the given pass if it doesn't - /// already have one, and starts the timer. - Timer *passStarted(Pass *P) { + /// getPassTimer - Return the timer for the specified pass if it exists. + Timer *getPassTimer(Pass *P) { if (P->getAsPMDataManager()) return 0; sys::SmartScopedLock<true> Lock(*TimingInfoMutex); - std::map<Pass*, Timer>::iterator I = TimingData.find(P); - if (I == TimingData.end()) - I=TimingData.insert(std::make_pair(P, Timer(P->getPassName(), TG))).first; - Timer *T = &I->second; - T->startTimer(); + Timer *&T = TimingData[P]; + if (T == 0) + T = new Timer(P->getPassName(), TG); return T; } }; @@ -704,11 +703,8 @@ void PMDataManager::verifyPreservedAnalysis(Pass *P) { E = PreservedSet.end(); I != E; ++I) { AnalysisID AID = *I; if (Pass *AP = findAnalysisPass(AID, true)) { - - Timer *T = 0; - if (TheTimeInfo) T = TheTimeInfo->passStarted(AP); + TimeRegion PassTimer(getPassTimer(AP)); AP->verifyAnalysis(); - if (T) T->stopTimer(); } } } @@ -792,10 +788,9 @@ void PMDataManager::freePass(Pass *P, StringRef Msg, { // If the pass crashes releasing memory, remember this. PassManagerPrettyStackEntry X(P); - - Timer *T = StartPassTimer(P); + TimeRegion PassTimer(getPassTimer(P)); + P->releaseMemory(); - StopPassTimer(P, T); } if (const PassInfo *PI = P->getPassInfo()) { @@ -1128,10 +1123,9 @@ bool BBPassManager::runOnFunction(Function &F) { { // If the pass crashes, remember this. PassManagerPrettyStackEntry X(BP, *I); - - Timer *T = StartPassTimer(BP); + TimeRegion PassTimer(getPassTimer(BP)); + LocalChanged |= BP->runOnBasicBlock(*I); - StopPassTimer(BP, T); } Changed |= LocalChanged; @@ -1345,10 +1339,9 @@ bool FPPassManager::runOnFunction(Function &F) { { PassManagerPrettyStackEntry X(FP, F); + TimeRegion PassTimer(getPassTimer(FP)); - Timer *T = StartPassTimer(FP); LocalChanged |= FP->runOnFunction(F); - StopPassTimer(FP, T); } Changed |= LocalChanged; @@ -1420,9 +1413,9 @@ MPPassManager::runOnModule(Module &M) { { PassManagerPrettyStackEntry X(MP, M); - Timer *T = StartPassTimer(MP); + TimeRegion PassTimer(getPassTimer(MP)); + LocalChanged |= MP->runOnModule(M); - StopPassTimer(MP, T); } Changed |= LocalChanged; @@ -1559,17 +1552,12 @@ void TimingInfo::createTheTimeInfo() { } /// If TimingInfo is enabled then start pass timer. -Timer *llvm::StartPassTimer(Pass *P) { +Timer *llvm::getPassTimer(Pass *P) { if (TheTimeInfo) - return TheTimeInfo->passStarted(P); + return TheTimeInfo->getPassTimer(P); return 0; } -/// If TimingInfo is enabled then stop pass timer. -void llvm::StopPassTimer(Pass *P, Timer *T) { - if (T) T->stopTimer(); -} - //===----------------------------------------------------------------------===// // PMStack implementation // diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp index 2a0cfa8..5f9c11f 100644 --- a/lib/VMCore/Type.cpp +++ b/lib/VMCore/Type.cpp @@ -57,6 +57,11 @@ void AbstractTypeUser::setType(Value *V, const Type *NewTy) { /// need for a std::vector to be used in the Type class itself. /// @brief Type destruction function void Type::destroy() const { + // Nothing calls getForwardedType from here on. + if (ForwardType && ForwardType->isAbstract()) { + ForwardType->dropRef(); + ForwardType = NULL; + } // Structures and Functions allocate their contained types past the end of // the type object itself. These need to be destroyed differently than the @@ -87,11 +92,6 @@ void Type::destroy() const { pImpl->OpaqueTypes.erase(opaque_this); } - if (ForwardType && ForwardType->isAbstract()) { - ForwardType->dropRef(); - ForwardType = NULL; - } - // For all the other type subclasses, there is either no contained types or // just one (all Sequentials). For Sequentials, the PATypeHandle is not // allocated past the type object, its included directly in the SequentialType diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp index a36d262..645dd5a 100644 --- a/lib/VMCore/Value.cpp +++ b/lib/VMCore/Value.cpp @@ -86,7 +86,7 @@ Value::~Value() { /// hasNUses - Return true if this Value has exactly N users. /// bool Value::hasNUses(unsigned N) const { - use_const_iterator UI = use_begin(), E = use_end(); + const_use_iterator UI = use_begin(), E = use_end(); for (; N; --N, ++UI) if (UI == E) return false; // Too few. @@ -97,7 +97,7 @@ bool Value::hasNUses(unsigned N) const { /// logically equivalent to getNumUses() >= N. /// bool Value::hasNUsesOrMore(unsigned N) const { - use_const_iterator UI = use_begin(), E = use_end(); + const_use_iterator UI = use_begin(), E = use_end(); for (; N; --N, ++UI) if (UI == E) return false; // Too few. @@ -108,7 +108,7 @@ bool Value::hasNUsesOrMore(unsigned N) const { /// isUsedInBasicBlock - Return true if this value is used in the specified /// basic block. bool Value::isUsedInBasicBlock(const BasicBlock *BB) const { - for (use_const_iterator I = use_begin(), E = use_end(); I != E; ++I) { + for (const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) { const Instruction *User = dyn_cast<Instruction>(*I); if (User && User->getParent() == BB) return true; diff --git a/lib/VMCore/ValueSymbolTable.cpp b/lib/VMCore/ValueSymbolTable.cpp index d30a9d6..449d61a 100644 --- a/lib/VMCore/ValueSymbolTable.cpp +++ b/lib/VMCore/ValueSymbolTable.cpp @@ -55,9 +55,7 @@ void ValueSymbolTable::reinsertValue(Value* V) { raw_svector_ostream(UniqueName) << ++LastUnique; // Try insert the vmap entry with this suffix. - ValueName &NewName = - vmap.GetOrCreateValue(StringRef(UniqueName.data(), - UniqueName.size())); + ValueName &NewName = vmap.GetOrCreateValue(UniqueName); if (NewName.getValue() == 0) { // Newly inserted name. Success! NewName.setValue(V); @@ -88,7 +86,7 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) { } // Otherwise, there is a naming conflict. Rename this value. - SmallString<128> UniqueName(Name.begin(), Name.end()); + SmallString<256> UniqueName(Name.begin(), Name.end()); while (1) { // Trim any suffix off and append the next number. @@ -96,9 +94,7 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) { raw_svector_ostream(UniqueName) << ++LastUnique; // Try insert the vmap entry with this suffix. - ValueName &NewName = - vmap.GetOrCreateValue(StringRef(UniqueName.data(), - UniqueName.size())); + ValueName &NewName = vmap.GetOrCreateValue(UniqueName); if (NewName.getValue() == 0) { // Newly inserted name. Success! NewName.setValue(V); diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index f141382..c18168d 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -676,17 +676,13 @@ void Verifier::visitFunction(Function &F) { "blockaddress may not be used with the entry block!", Entry); } } - + // If this function is actually an intrinsic, verify that it is only used in // direct call/invokes, never having its "address taken". if (F.getIntrinsicID()) { - for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E;++UI){ - User *U = cast<User>(UI); - if ((isa<CallInst>(U) || isa<InvokeInst>(U)) && UI.getOperandNo() == 0) - continue; // Direct calls/invokes are ok. - + const User *U; + if (F.hasAddressTaken(&U)) Assert1(0, "Invalid user of intrinsic instruction!", U); - } } } @@ -1483,7 +1479,7 @@ void Verifier::visitInstruction(Instruction &I) { "Instruction does not dominate all uses!", Op, &I); } } else if (isa<InlineAsm>(I.getOperand(i))) { - Assert1(i == 0 && (isa<CallInst>(I) || isa<InvokeInst>(I)), + Assert1((i == 0 && isa<CallInst>(I)) || (i + 3 == e && isa<InvokeInst>(I)), "Cannot take the address of an inline asm!", &I); } } @@ -1683,13 +1679,11 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { /// parameters beginning with NumRets. /// static std::string IntrinsicParam(unsigned ArgNo, unsigned NumRets) { - if (ArgNo < NumRets) { - if (NumRets == 1) - return "Intrinsic result type"; - else - return "Intrinsic result type #" + utostr(ArgNo); - } else + if (ArgNo >= NumRets) return "Intrinsic parameter #" + utostr(ArgNo - NumRets); + if (NumRets == 1) + return "Intrinsic result type"; + return "Intrinsic result type #" + utostr(ArgNo); } bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty, @@ -1706,9 +1700,13 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty, const Type *RetTy = FTy->getReturnType(); const StructType *ST = dyn_cast<StructType>(RetTy); - unsigned NumRets = 1; - if (ST) - NumRets = ST->getNumElements(); + unsigned NumRetVals; + if (RetTy->isVoidTy()) + NumRetVals = 0; + else if (ST) + NumRetVals = ST->getNumElements(); + else + NumRetVals = 1; if (VT < 0) { int Match = ~VT; @@ -1720,7 +1718,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty, TruncatedElementVectorType)) != 0) { const IntegerType *IEltTy = dyn_cast<IntegerType>(EltTy); if (!VTy || !IEltTy) { - CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not " + CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not " "an integral vector type.", F); return false; } @@ -1728,7 +1726,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty, // the type being matched against. if ((Match & ExtendedElementVectorType) != 0) { if ((IEltTy->getBitWidth() & 1) != 0) { - CheckFailed(IntrinsicParam(ArgNo, NumRets) + " vector " + CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " vector " "element bit-width is odd.", F); return false; } @@ -1738,25 +1736,25 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty, Match &= ~(ExtendedElementVectorType | TruncatedElementVectorType); } - if (Match <= static_cast<int>(NumRets - 1)) { + if (Match <= static_cast<int>(NumRetVals - 1)) { if (ST) RetTy = ST->getElementType(Match); if (Ty != RetTy) { - CheckFailed(IntrinsicParam(ArgNo, NumRets) + " does not " + CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " does not " "match return type.", F); return false; } } else { - if (Ty != FTy->getParamType(Match - NumRets)) { - CheckFailed(IntrinsicParam(ArgNo, NumRets) + " does not " - "match parameter %" + utostr(Match - NumRets) + ".", F); + if (Ty != FTy->getParamType(Match - NumRetVals)) { + CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " does not " + "match parameter %" + utostr(Match - NumRetVals) + ".", F); return false; } } } else if (VT == MVT::iAny) { if (!EltTy->isIntegerTy()) { - CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not " + CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not " "an integer type.", F); return false; } @@ -1781,7 +1779,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty, } } else if (VT == MVT::fAny) { if (!EltTy->isFloatingPointTy()) { - CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not " + CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not " "a floating-point type.", F); return false; } @@ -1794,13 +1792,14 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty, Suffix += EVT::getEVT(EltTy).getEVTString(); } else if (VT == MVT::vAny) { if (!VTy) { - CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not a vector type.", F); + CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not a vector type.", + F); return false; } Suffix += ".v" + utostr(NumElts) + EVT::getEVT(EltTy).getEVTString(); } else if (VT == MVT::iPTR) { if (!Ty->isPointerTy()) { - CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not a " + CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not a " "pointer and a pointer is required.", F); return false; } @@ -1812,7 +1811,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty, Suffix += ".p" + utostr(PTyp->getAddressSpace()) + EVT::getEVT(PTyp->getElementType()).getEVTString(); } else { - CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not a " + CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not a " "pointer and a pointer is required.", F); return false; } @@ -1832,10 +1831,10 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty, } } else if (EVT((MVT::SimpleValueType)VT).getTypeForEVT(Ty->getContext()) != EltTy) { - CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is wrong!", F); + CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is wrong!", F); return false; } else if (EltTy != Ty) { - CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is a vector " + CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is a vector " "and a scalar is required.", F); return false; } @@ -1847,10 +1846,10 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty, /// Intrinsics.gen. This implements a little state machine that verifies the /// prototype of intrinsics. void Verifier::VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F, - unsigned RetNum, - unsigned ParamNum, ...) { + unsigned NumRetVals, + unsigned NumParams, ...) { va_list VA; - va_start(VA, ParamNum); + va_start(VA, NumParams); const FunctionType *FTy = F->getFunctionType(); // For overloaded intrinsics, the Suffix of the function name must match the @@ -1858,7 +1857,7 @@ void Verifier::VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F, // suffix, to be checked at the end. std::string Suffix; - if (FTy->getNumParams() + FTy->isVarArg() != ParamNum) { + if (FTy->getNumParams() + FTy->isVarArg() != NumParams) { CheckFailed("Intrinsic prototype has incorrect number of arguments!", F); return; } @@ -1866,23 +1865,27 @@ void Verifier::VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F, const Type *Ty = FTy->getReturnType(); const StructType *ST = dyn_cast<StructType>(Ty); + if (NumRetVals == 0 && !Ty->isVoidTy()) { + CheckFailed("Intrinsic should return void", F); + return; + } + // Verify the return types. - if (ST && ST->getNumElements() != RetNum) { + if (ST && ST->getNumElements() != NumRetVals) { CheckFailed("Intrinsic prototype has incorrect number of return types!", F); return; } - - for (unsigned ArgNo = 0; ArgNo < RetNum; ++ArgNo) { + + for (unsigned ArgNo = 0; ArgNo != NumRetVals; ++ArgNo) { int VT = va_arg(VA, int); // An MVT::SimpleValueType when non-negative. if (ST) Ty = ST->getElementType(ArgNo); - if (!PerformTypeCheck(ID, F, Ty, VT, ArgNo, Suffix)) break; } // Verify the parameter types. - for (unsigned ArgNo = 0; ArgNo < ParamNum; ++ArgNo) { + for (unsigned ArgNo = 0; ArgNo != NumParams; ++ArgNo) { int VT = va_arg(VA, int); // An MVT::SimpleValueType when non-negative. if (VT == MVT::isVoid && ArgNo > 0) { @@ -1891,8 +1894,8 @@ void Verifier::VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F, break; } - if (!PerformTypeCheck(ID, F, FTy->getParamType(ArgNo), VT, ArgNo + RetNum, - Suffix)) + if (!PerformTypeCheck(ID, F, FTy->getParamType(ArgNo), VT, + ArgNo + NumRetVals, Suffix)) break; } |