diff options
Diffstat (limited to 'lib')
302 files changed, 10725 insertions, 5828 deletions
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index ad05dd9..5a37ce0 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -28,6 +28,7 @@ add_llvm_library(LLVMAnalysis LoopPass.cpp MemoryBuiltins.cpp MemoryDependenceAnalysis.cpp + ModuleDebugInfoPrinter.cpp PHITransAddr.cpp PointerTracking.cpp PostDominators.cpp diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index 141b181..a7b6d2b 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -32,42 +32,6 @@ using namespace llvm::dwarf; // DIDescriptor //===----------------------------------------------------------------------===// -/// ValidDebugInfo - Return true if V represents valid debug info value. -/// FIXME : Add DIDescriptor.isValid() -bool DIDescriptor::ValidDebugInfo(MDNode *N, unsigned OptLevel) { - if (!N) - return false; - - DIDescriptor DI(N); - - // Check current version. Allow Version7 for now. - unsigned Version = DI.getVersion(); - if (Version != LLVMDebugVersion && Version != LLVMDebugVersion7) - return false; - - switch (DI.getTag()) { - case DW_TAG_variable: - assert(DIVariable(N).Verify() && "Invalid DebugInfo value"); - break; - case DW_TAG_compile_unit: - assert(DICompileUnit(N).Verify() && "Invalid DebugInfo value"); - break; - case DW_TAG_subprogram: - assert(DISubprogram(N).Verify() && "Invalid DebugInfo value"); - break; - case DW_TAG_lexical_block: - // FIXME: This interfers with the quality of generated code during - // optimization. - if (OptLevel != CodeGenOpt::None) - return false; - // FALLTHROUGH - default: - break; - } - - return true; -} - StringRef DIDescriptor::getStringField(unsigned Elt) const { if (DbgNode == 0) @@ -96,7 +60,7 @@ DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const { return DIDescriptor(); if (Elt < DbgNode->getNumOperands()) - return DIDescriptor(dyn_cast_or_null<MDNode>(DbgNode->getOperand(Elt))); + return DIDescriptor(dyn_cast_or_null<const MDNode>(DbgNode->getOperand(Elt))); return DIDescriptor(); } @@ -246,7 +210,7 @@ bool DIDescriptor::isEnumerator() const { // Simple Descriptor Constructors and other Methods //===----------------------------------------------------------------------===// -DIType::DIType(MDNode *N) : DIScope(N) { +DIType::DIType(const MDNode *N) : DIScope(N) { if (!N) return; if (!isBasicType() && !isDerivedType() && !isCompositeType()) { DbgNode = 0; @@ -271,9 +235,11 @@ void DIDerivedType::replaceAllUsesWith(DIDescriptor &D) { // which, due to uniquing, has merged with the source. We shield clients from // this detail by allowing a value to be replaced with replaceAllUsesWith() // itself. - if (getNode() != D.getNode()) { - MDNode *Node = DbgNode; - Node->replaceAllUsesWith(D.getNode()); + if (DbgNode != D) { + MDNode *Node = const_cast<MDNode*>(DbgNode); + const MDNode *DN = D; + const Value *V = cast_or_null<Value>(DN); + Node->replaceAllUsesWith(const_cast<Value*>(V)); Node->destroy(); } } @@ -366,6 +332,9 @@ bool DIVariable::Verify() const { if (!getContext().Verify()) return false; + if (!getCompileUnit().Verify()) + return false; + DIType Ty = getType(); if (!Ty.Verify()) return false; @@ -381,6 +350,17 @@ bool DILocation::Verify() const { return DbgNode->getNumOperands() == 4; } +/// Verify - Verify that a namespace descriptor is well formed. +bool DINameSpace::Verify() const { + if (!DbgNode) + return false; + if (getName().empty()) + return false; + if (!getCompileUnit().Verify()) + return false; + return true; +} + /// getOriginalTypeSize - If this type is derived from a base type then /// return base type size. uint64_t DIDerivedType::getOriginalTypeSize() const { @@ -394,7 +374,7 @@ uint64_t DIDerivedType::getOriginalTypeSize() const { if (!BaseType.isValid()) return getSizeInBits(); if (BaseType.isDerivedType()) - return DIDerivedType(BaseType.getNode()).getOriginalTypeSize(); + return DIDerivedType(BaseType).getOriginalTypeSize(); else return BaseType.getSizeInBits(); } @@ -410,7 +390,7 @@ bool DIVariable::isInlinedFnArgument(const Function *CurFn) { return false; // This variable is not inlined function argument if its scope // does not describe current function. - return !(DISubprogram(getContext().getNode()).describes(CurFn)); + return !(DISubprogram(getContext()).describes(CurFn)); } /// describes - Return true if this subprogram provides debugging @@ -475,144 +455,182 @@ StringRef DIScope::getDirectory() const { //===----------------------------------------------------------------------===// -/// dump - Print descriptor. -void DIDescriptor::dump() const { - dbgs() << "[" << dwarf::TagString(getTag()) << "] "; - dbgs().write_hex((intptr_t) &*DbgNode) << ']'; +/// print - Print descriptor. +void DIDescriptor::print(raw_ostream &OS) const { + OS << "[" << dwarf::TagString(getTag()) << "] "; + OS.write_hex((intptr_t) &*DbgNode) << ']'; } -/// dump - Print compile unit. -void DICompileUnit::dump() const { +/// print - Print compile unit. +void DICompileUnit::print(raw_ostream &OS) const { if (getLanguage()) - dbgs() << " [" << dwarf::LanguageString(getLanguage()) << "] "; + OS << " [" << dwarf::LanguageString(getLanguage()) << "] "; - dbgs() << " [" << getDirectory() << "/" << getFilename() << " ]"; + OS << " [" << getDirectory() << "/" << getFilename() << "]"; } -/// dump - Print type. -void DIType::dump() const { +/// print - Print type. +void DIType::print(raw_ostream &OS) const { if (!DbgNode) return; StringRef Res = getName(); if (!Res.empty()) - dbgs() << " [" << Res << "] "; + OS << " [" << Res << "] "; unsigned Tag = getTag(); - dbgs() << " [" << dwarf::TagString(Tag) << "] "; + OS << " [" << dwarf::TagString(Tag) << "] "; // TODO : Print context - getCompileUnit().dump(); - dbgs() << " [" - << getLineNumber() << ", " - << getSizeInBits() << ", " - << getAlignInBits() << ", " - << getOffsetInBits() + getCompileUnit().print(OS); + OS << " [" + << "line " << getLineNumber() << ", " + << getSizeInBits() << " bits, " + << getAlignInBits() << " bit alignment, " + << getOffsetInBits() << " bit offset" << "] "; if (isPrivate()) - dbgs() << " [private] "; + OS << " [private] "; else if (isProtected()) - dbgs() << " [protected] "; + OS << " [protected] "; if (isForwardDecl()) - dbgs() << " [fwd] "; + OS << " [fwd] "; if (isBasicType()) - DIBasicType(DbgNode).dump(); + DIBasicType(DbgNode).print(OS); else if (isDerivedType()) - DIDerivedType(DbgNode).dump(); + DIDerivedType(DbgNode).print(OS); else if (isCompositeType()) - DICompositeType(DbgNode).dump(); + DICompositeType(DbgNode).print(OS); else { - dbgs() << "Invalid DIType\n"; + OS << "Invalid DIType\n"; return; } - dbgs() << "\n"; + OS << "\n"; } -/// dump - Print basic type. -void DIBasicType::dump() const { - dbgs() << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] "; +/// print - Print basic type. +void DIBasicType::print(raw_ostream &OS) const { + OS << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] "; } -/// dump - Print derived type. -void DIDerivedType::dump() const { - dbgs() << "\n\t Derived From: "; getTypeDerivedFrom().dump(); +/// print - Print derived type. +void DIDerivedType::print(raw_ostream &OS) const { + OS << "\n\t Derived From: "; getTypeDerivedFrom().print(OS); } -/// dump - Print composite type. -void DICompositeType::dump() const { +/// print - Print composite type. +void DICompositeType::print(raw_ostream &OS) const { DIArray A = getTypeArray(); - dbgs() << " [" << A.getNumElements() << " elements]"; + OS << " [" << A.getNumElements() << " elements]"; } -/// dump - Print global. -void DIGlobal::dump() const { +/// print - Print subprogram. +void DISubprogram::print(raw_ostream &OS) const { StringRef Res = getName(); if (!Res.empty()) - dbgs() << " [" << Res << "] "; + OS << " [" << Res << "] "; unsigned Tag = getTag(); - dbgs() << " [" << dwarf::TagString(Tag) << "] "; + OS << " [" << dwarf::TagString(Tag) << "] "; // TODO : Print context - getCompileUnit().dump(); - dbgs() << " [" << getLineNumber() << "] "; + getCompileUnit().print(OS); + OS << " [" << getLineNumber() << "] "; if (isLocalToUnit()) - dbgs() << " [local] "; + OS << " [local] "; if (isDefinition()) - dbgs() << " [def] "; + OS << " [def] "; - if (isGlobalVariable()) - DIGlobalVariable(DbgNode).dump(); - - dbgs() << "\n"; + OS << "\n"; } -/// dump - Print subprogram. -void DISubprogram::dump() const { +/// print - Print global variable. +void DIGlobalVariable::print(raw_ostream &OS) const { + OS << " ["; StringRef Res = getName(); if (!Res.empty()) - dbgs() << " [" << Res << "] "; + OS << " [" << Res << "] "; unsigned Tag = getTag(); - dbgs() << " [" << dwarf::TagString(Tag) << "] "; + OS << " [" << dwarf::TagString(Tag) << "] "; // TODO : Print context - getCompileUnit().dump(); - dbgs() << " [" << getLineNumber() << "] "; + getCompileUnit().print(OS); + OS << " [" << getLineNumber() << "] "; if (isLocalToUnit()) - dbgs() << " [local] "; + OS << " [local] "; if (isDefinition()) - dbgs() << " [def] "; + OS << " [def] "; + + if (isGlobalVariable()) + DIGlobalVariable(DbgNode).print(OS); + OS << "]\n"; +} + +/// print - Print variable. +void DIVariable::print(raw_ostream &OS) const { + StringRef Res = getName(); + if (!Res.empty()) + OS << " [" << Res << "] "; + + getCompileUnit().print(OS); + OS << " [" << getLineNumber() << "] "; + getType().print(OS); + OS << "\n"; + + // FIXME: Dump complex addresses +} + +/// dump - Print descriptor to dbgs() with a newline. +void DIDescriptor::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +/// dump - Print compile unit to dbgs() with a newline. +void DICompileUnit::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +/// dump - Print type to dbgs() with a newline. +void DIType::dump() const { + print(dbgs()); dbgs() << '\n'; +} - dbgs() << "\n"; +/// dump - Print basic type to dbgs() with a newline. +void DIBasicType::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +/// dump - Print derived type to dbgs() with a newline. +void DIDerivedType::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +/// dump - Print composite type to dbgs() with a newline. +void DICompositeType::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +/// dump - Print subprogram to dbgs() with a newline. +void DISubprogram::dump() const { + print(dbgs()); dbgs() << '\n'; } /// dump - Print global variable. void DIGlobalVariable::dump() const { - dbgs() << " ["; - getGlobal()->dump(); - dbgs() << "] "; + print(dbgs()); dbgs() << '\n'; } /// dump - Print variable. void DIVariable::dump() const { - StringRef Res = getName(); - if (!Res.empty()) - dbgs() << " [" << Res << "] "; - - getCompileUnit().dump(); - dbgs() << " [" << getLineNumber() << "] "; - getType().dump(); - dbgs() << "\n"; - - // FIXME: Dump complex addresses + print(dbgs()); dbgs() << '\n'; } //===----------------------------------------------------------------------===// @@ -641,7 +659,7 @@ DIArray DIFactory::GetOrCreateArray(DIDescriptor *Tys, unsigned NumTys) { Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext))); else for (unsigned i = 0; i != NumTys; ++i) - Elts.push_back(Tys[i].getNode()); + Elts.push_back(Tys[i]); return DIArray(MDNode::get(VMContext,Elts.data(), Elts.size())); } @@ -694,7 +712,7 @@ DIFile DIFactory::CreateFile(StringRef Filename, GetTagConstant(dwarf::DW_TAG_file_type), MDString::get(VMContext, Filename), MDString::get(VMContext, Directory), - CU.getNode() + CU }; return DIFile(MDNode::get(VMContext, &Elts[0], 4)); @@ -722,9 +740,9 @@ DIBasicType DIFactory::CreateBasicType(DIDescriptor Context, unsigned Encoding) { Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_base_type), - Context.getNode(), + Context, MDString::get(VMContext, Name), - F.getNode(), + F, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -747,9 +765,9 @@ DIBasicType DIFactory::CreateBasicTypeEx(DIDescriptor Context, unsigned Encoding) { Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_base_type), - Context.getNode(), + Context, MDString::get(VMContext, Name), - F.getNode(), + F, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), SizeInBits, AlignInBits, @@ -766,7 +784,7 @@ DIType DIFactory::CreateArtificialType(DIType Ty) { return Ty; SmallVector<Value *, 9> Elts; - MDNode *N = Ty.getNode(); + MDNode *N = Ty; assert (N && "Unexpected input DIType!"); for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { if (Value *V = N->getOperand(i)) @@ -798,15 +816,15 @@ DIDerivedType DIFactory::CreateDerivedType(unsigned Tag, DIType DerivedFrom) { Value *Elts[] = { GetTagConstant(Tag), - Context.getNode(), + Context, MDString::get(VMContext, Name), - F.getNode(), + F, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - DerivedFrom.getNode(), + DerivedFrom, }; return DIDerivedType(MDNode::get(VMContext, &Elts[0], 10)); } @@ -826,15 +844,15 @@ DIDerivedType DIFactory::CreateDerivedTypeEx(unsigned Tag, DIType DerivedFrom) { Value *Elts[] = { GetTagConstant(Tag), - Context.getNode(), + Context, MDString::get(VMContext, Name), - F.getNode(), + F, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), SizeInBits, AlignInBits, OffsetInBits, ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - DerivedFrom.getNode(), + DerivedFrom, }; return DIDerivedType(MDNode::get(VMContext, &Elts[0], 10)); } @@ -857,16 +875,16 @@ DICompositeType DIFactory::CreateCompositeType(unsigned Tag, Value *Elts[] = { GetTagConstant(Tag), - Context.getNode(), + Context, MDString::get(VMContext, Name), - F.getNode(), + F, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - DerivedFrom.getNode(), - Elements.getNode(), + DerivedFrom, + Elements, ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang), ContainingType }; @@ -890,16 +908,16 @@ DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag, Value *Elts[] = { GetTagConstant(Tag), - Context.getNode(), + Context, MDString::get(VMContext, Name), - F.getNode(), + F, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), SizeInBits, AlignInBits, OffsetInBits, ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - DerivedFrom.getNode(), - Elements.getNode(), + DerivedFrom, + Elements, ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang) }; return DICompositeType(MDNode::get(VMContext, &Elts[0], 12)); @@ -925,18 +943,18 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_subprogram), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - Context.getNode(), + Context, MDString::get(VMContext, Name), MDString::get(VMContext, DisplayName), MDString::get(VMContext, LinkageName), - F.getNode(), + F, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), - Ty.getNode(), + Ty, ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK), ConstantInt::get(Type::getInt32Ty(VMContext), VIndex), - ContainingType.getNode(), + ContainingType, ConstantInt::get(Type::getInt1Ty(VMContext), isArtificial), ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized) }; @@ -947,9 +965,9 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, /// given declaration. DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration) { if (SPDeclaration.isDefinition()) - return DISubprogram(SPDeclaration.getNode()); + return DISubprogram(SPDeclaration); - MDNode *DeclNode = SPDeclaration.getNode(); + MDNode *DeclNode = SPDeclaration; Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_subprogram), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), @@ -982,13 +1000,13 @@ DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name, Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_variable), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - Context.getNode(), + Context, MDString::get(VMContext, Name), MDString::get(VMContext, DisplayName), MDString::get(VMContext, LinkageName), - F.getNode(), + F, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), - Ty.getNode(), + Ty, ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), Val @@ -1010,16 +1028,24 @@ DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context, StringRef Name, DIFile F, unsigned LineNo, - DIType Ty) { + DIType Ty, bool AlwaysPreserve) { Value *Elts[] = { GetTagConstant(Tag), - Context.getNode(), + Context, MDString::get(VMContext, Name), - F.getNode(), + F, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), - Ty.getNode(), + Ty, }; - return DIVariable(MDNode::get(VMContext, &Elts[0], 6)); + MDNode *Node = MDNode::get(VMContext, &Elts[0], 6); + if (AlwaysPreserve) { + // The optimizer may remove local variable. If there is an interest + // to preserve variable info in such situation then stash it in a + // named mdnode. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.lv"); + NMD->addOperand(Node); + } + return DIVariable(Node); } @@ -1033,11 +1059,11 @@ DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context, SmallVector<Value *, 9> &addr) { SmallVector<Value *, 9> Elts; Elts.push_back(GetTagConstant(Tag)); - Elts.push_back(Context.getNode()); + Elts.push_back(Context); Elts.push_back(MDString::get(VMContext, Name)); - Elts.push_back(F.getNode()); + Elts.push_back(F); Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)); - Elts.push_back(Ty.getNode()); + Elts.push_back(Ty); Elts.insert(Elts.end(), addr.begin(), addr.end()); return DIVariable(MDNode::get(VMContext, &Elts[0], 6+addr.size())); @@ -1050,7 +1076,7 @@ DILexicalBlock DIFactory::CreateLexicalBlock(DIDescriptor Context, unsigned LineNo, unsigned Col) { Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_lexical_block), - Context.getNode(), + Context, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), ConstantInt::get(Type::getInt32Ty(VMContext), Col) }; @@ -1064,9 +1090,9 @@ DINameSpace DIFactory::CreateNameSpace(DIDescriptor Context, StringRef Name, unsigned LineNo) { Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_namespace), - Context.getNode(), + Context, MDString::get(VMContext, Name), - F.getNode(), + F, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) }; return DINameSpace(MDNode::get(VMContext, &Elts[0], 5)); @@ -1078,8 +1104,8 @@ DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo, Value *Elts[] = { ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo), - S.getNode(), - OrigLoc.getNode(), + S, + OrigLoc, }; return DILocation(MDNode::get(VMContext, &Elts[0], 4)); } @@ -1090,7 +1116,7 @@ DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo, Value *Elts[] = { ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo), - S.getNode(), + S, OrigLoc }; return DILocation(MDNode::get(VMContext, &Elts[0], 4)); @@ -1104,12 +1130,12 @@ DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo, Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D, Instruction *InsertBefore) { assert(Storage && "no storage passed to dbg.declare"); - assert(D.getNode() && "empty DIVariable passed to dbg.declare"); + assert(D.Verify() && "empty DIVariable passed to dbg.declare"); if (!DeclareFn) DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), - D.getNode() }; + D }; return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore); } @@ -1117,12 +1143,12 @@ Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D, Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D, BasicBlock *InsertAtEnd) { assert(Storage && "no storage passed to dbg.declare"); - assert(D.getNode() && "empty DIVariable passed to dbg.declare"); + assert(D.Verify() && "invalid DIVariable passed to dbg.declare"); if (!DeclareFn) DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), - D.getNode() }; + D }; // If this block already has a terminator then insert this intrinsic // before the terminator. @@ -1136,13 +1162,13 @@ Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, DIVariable D, Instruction *InsertBefore) { assert(V && "no value passed to dbg.value"); - assert(D.getNode() && "empty DIVariable passed to dbg.value"); + assert(D.Verify() && "invalid DIVariable passed to dbg.value"); if (!ValueFn) ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); Value *Args[] = { MDNode::get(V->getContext(), &V, 1), ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), - D.getNode() }; + D }; return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore); } @@ -1151,13 +1177,13 @@ Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, DIVariable D, BasicBlock *InsertAtEnd) { assert(V && "no value passed to dbg.value"); - assert(D.getNode() && "empty DIVariable passed to dbg.value"); + assert(D.Verify() && "invalid DIVariable passed to dbg.value"); if (!ValueFn) ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); Value *Args[] = { MDNode::get(V->getContext(), &V, 1), ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), - D.getNode() }; + D }; return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd); } @@ -1182,11 +1208,11 @@ void DebugInfoFinder::processModule(Module &M) { DIDescriptor Scope(Loc.getScope(Ctx)); if (Scope.isCompileUnit()) - addCompileUnit(DICompileUnit(Scope.getNode())); + addCompileUnit(DICompileUnit(Scope)); else if (Scope.isSubprogram()) - processSubprogram(DISubprogram(Scope.getNode())); + processSubprogram(DISubprogram(Scope)); else if (Scope.isLexicalBlock()) - processLexicalBlock(DILexicalBlock(Scope.getNode())); + processLexicalBlock(DILexicalBlock(Scope)); if (MDNode *IA = Loc.getInlinedAt(Ctx)) processLocation(DILocation(IA)); @@ -1208,13 +1234,13 @@ void DebugInfoFinder::processModule(Module &M) { /// processLocation - Process DILocation. void DebugInfoFinder::processLocation(DILocation Loc) { if (!Loc.Verify()) return; - DIDescriptor S(Loc.getScope().getNode()); + DIDescriptor S(Loc.getScope()); if (S.isCompileUnit()) - addCompileUnit(DICompileUnit(S.getNode())); + addCompileUnit(DICompileUnit(S)); else if (S.isSubprogram()) - processSubprogram(DISubprogram(S.getNode())); + processSubprogram(DISubprogram(S)); else if (S.isLexicalBlock()) - processLexicalBlock(DILexicalBlock(S.getNode())); + processLexicalBlock(DILexicalBlock(S)); processLocation(Loc.getOrigLocation()); } @@ -1225,18 +1251,18 @@ void DebugInfoFinder::processType(DIType DT) { addCompileUnit(DT.getCompileUnit()); if (DT.isCompositeType()) { - DICompositeType DCT(DT.getNode()); + DICompositeType DCT(DT); processType(DCT.getTypeDerivedFrom()); DIArray DA = DCT.getTypeArray(); for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) { DIDescriptor D = DA.getElement(i); if (D.isType()) - processType(DIType(D.getNode())); + processType(DIType(D)); else if (D.isSubprogram()) - processSubprogram(DISubprogram(D.getNode())); + processSubprogram(DISubprogram(D)); } } else if (DT.isDerivedType()) { - DIDerivedType DDT(DT.getNode()); + DIDerivedType DDT(DT); processType(DDT.getTypeDerivedFrom()); } } @@ -1245,9 +1271,9 @@ void DebugInfoFinder::processType(DIType DT) { void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) { DIScope Context = LB.getContext(); if (Context.isLexicalBlock()) - return processLexicalBlock(DILexicalBlock(Context.getNode())); + return processLexicalBlock(DILexicalBlock(Context)); else - return processSubprogram(DISubprogram(Context.getNode())); + return processSubprogram(DISubprogram(Context)); } /// processSubprogram - Process DISubprogram. @@ -1267,7 +1293,7 @@ void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) { if (!DV.isVariable()) return; - if (!NodesSeen.insert(DV.getNode())) + if (!NodesSeen.insert(DV)) return; addCompileUnit(DIVariable(N).getCompileUnit()); @@ -1279,10 +1305,10 @@ bool DebugInfoFinder::addType(DIType DT) { if (!DT.isValid()) return false; - if (!NodesSeen.insert(DT.getNode())) + if (!NodesSeen.insert(DT)) return false; - TYs.push_back(DT.getNode()); + TYs.push_back(DT); return true; } @@ -1291,34 +1317,34 @@ bool DebugInfoFinder::addCompileUnit(DICompileUnit CU) { if (!CU.Verify()) return false; - if (!NodesSeen.insert(CU.getNode())) + if (!NodesSeen.insert(CU)) return false; - CUs.push_back(CU.getNode()); + CUs.push_back(CU); return true; } /// addGlobalVariable - Add global variable into GVs. bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable DIG) { - if (!DIDescriptor(DIG.getNode()).isGlobalVariable()) + if (!DIDescriptor(DIG).isGlobalVariable()) return false; - if (!NodesSeen.insert(DIG.getNode())) + if (!NodesSeen.insert(DIG)) return false; - GVs.push_back(DIG.getNode()); + GVs.push_back(DIG); return true; } // addSubprogram - Add subprgoram into SPs. bool DebugInfoFinder::addSubprogram(DISubprogram SP) { - if (!DIDescriptor(SP.getNode()).isSubprogram()) + if (!DIDescriptor(SP).isSubprogram()) return false; - if (!NodesSeen.insert(SP.getNode())) + if (!NodesSeen.insert(SP)) return false; - SPs.push_back(SP.getNode()); + SPs.push_back(SP); return true; } @@ -1333,8 +1359,8 @@ static Value *findDbgGlobalDeclare(GlobalVariable *V) { DIDescriptor DIG(cast_or_null<MDNode>(NMD->getOperand(i))); if (!DIG.isGlobalVariable()) continue; - if (DIGlobalVariable(DIG.getNode()).getGlobal() == V) - return DIG.getNode(); + if (DIGlobalVariable(DIG).getGlobal() == V) + return DIG; } return 0; } @@ -1406,13 +1432,13 @@ bool llvm::getLocationInfo(const Value *V, std::string &DisplayName, } /// getDISubprogram - Find subprogram that is enclosing this scope. -DISubprogram llvm::getDISubprogram(MDNode *Scope) { +DISubprogram llvm::getDISubprogram(const MDNode *Scope) { DIDescriptor D(Scope); if (D.isSubprogram()) return DISubprogram(Scope); if (D.isLexicalBlock()) - return getDISubprogram(DILexicalBlock(Scope).getContext().getNode()); + return getDISubprogram(DILexicalBlock(Scope).getContext()); return DISubprogram(); } @@ -1420,10 +1446,10 @@ DISubprogram llvm::getDISubprogram(MDNode *Scope) { /// getDICompositeType - Find underlying composite type. DICompositeType llvm::getDICompositeType(DIType T) { if (T.isCompositeType()) - return DICompositeType(T.getNode()); + return DICompositeType(T); if (T.isDerivedType()) - return getDICompositeType(DIDerivedType(T.getNode()).getTypeDerivedFrom()); + return getDICompositeType(DIDerivedType(T).getTypeDerivedFrom()); return DICompositeType(); } diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 6271371..98dbb69 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -175,7 +175,11 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) { // Each argument to a call takes on average one instruction to set up. NumInsts += CS.arg_size(); - ++NumCalls; + + // We don't want inline asm to count as a call - that would prevent loop + // unrolling. The argument setup cost is still real, though. + if (!isa<InlineAsm>(CS.getCalledValue())) + ++NumCalls; } } @@ -455,6 +459,11 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) { else CallerMetrics.NumInsts = 0; - // We are not updating the argumentweights. We have already determined that + // We are not updating the argument weights. We have already determined that // Caller is a fairly large function, so we accept the loss of precision. } + +/// clear - empty the cache of inline costs +void InlineCostAnalyzer::clear() { + CachedFunctionInfo.clear(); +} diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 25d4f95..a031cbc 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -179,6 +179,7 @@ bool Lint::runOnFunction(Function &F) { TD = getAnalysisIfAvailable<TargetData>(); visit(F); dbgs() << MessagesStr.str(); + Messages.clear(); return false; } @@ -193,7 +194,6 @@ void Lint::visitCallSite(CallSite CS) { Instruction &I = *CS.getInstruction(); Value *Callee = CS.getCalledValue(); - // TODO: Check function alignment? visitMemoryReference(I, Callee, 0, 0, MemRef::Callee); if (Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) { @@ -219,7 +219,15 @@ void Lint::visitCallSite(CallSite CS) { // TODO: Check sret attribute. } - // TODO: Check the "tail" keyword constraints. + if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall()) + for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); + AI != AE; ++AI) { + Value *Obj = (*AI)->getUnderlyingObject(); + Assert1(!isa<AllocaInst>(Obj) && !isa<VAArgInst>(Obj), + "Undefined behavior: Call with \"tail\" keyword references " + "alloca or va_arg", &I); + } + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) switch (II->getIntrinsicID()) { @@ -280,8 +288,11 @@ void Lint::visitCallSite(CallSite CS) { break; case Intrinsic::stackrestore: + // Stackrestore doesn't read or write memory, but it sets the + // stack pointer, which the compiler may read from or write to + // at any time, so check it for both readability and writeability. visitMemoryReference(I, CS.getArgument(0), 0, 0, - MemRef::Read); + MemRef::Read | MemRef::Write); break; } } @@ -482,14 +493,10 @@ void llvm::lintFunction(const Function &f) { } /// lintModule - Check a module for errors, printing messages on stderr. -/// Return true if the module is corrupt. /// -void llvm::lintModule(const Module &M, std::string *ErrorInfo) { +void llvm::lintModule(const Module &M) { PassManager PM; Lint *V = new Lint(); PM.add(V); PM.run(const_cast<Module&>(M)); - - if (ErrorInfo) - *ErrorInfo = V->MessagesStr.str(); } diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp new file mode 100644 index 0000000..556d4c8 --- /dev/null +++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -0,0 +1,86 @@ +//===-- ModuleDebugInfoPrinter.cpp - Prints module debug info metadata ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass decodes the debug info metadata in a module and prints in a +// (sufficiently-prepared-) human-readable form. +// +// For example, run this pass from opt along with the -analyze option, and +// it'll print to standard output. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Pass.h" +#include "llvm/Function.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +namespace { + class ModuleDebugInfoPrinter : public ModulePass { + DebugInfoFinder Finder; + public: + static char ID; // Pass identification, replacement for typeid + ModuleDebugInfoPrinter() : ModulePass(&ID) {} + + virtual bool runOnModule(Module &M); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + virtual void print(raw_ostream &O, const Module *M) const; + }; +} + +char ModuleDebugInfoPrinter::ID = 0; +static RegisterPass<ModuleDebugInfoPrinter> +X("module-debuginfo", + "Decodes module-level debug info", false, true); + +ModulePass *llvm::createModuleDebugInfoPrinterPass() { + return new ModuleDebugInfoPrinter(); +} + +bool ModuleDebugInfoPrinter::runOnModule(Module &M) { + Finder.processModule(M); + return false; +} + +void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const { + for (DebugInfoFinder::iterator I = Finder.compile_unit_begin(), + E = Finder.compile_unit_end(); I != E; ++I) { + O << "Compile Unit: "; + DICompileUnit(*I).print(O); + O << '\n'; + } + + for (DebugInfoFinder::iterator I = Finder.subprogram_begin(), + E = Finder.subprogram_end(); I != E; ++I) { + O << "Subprogram: "; + DISubprogram(*I).print(O); + O << '\n'; + } + + for (DebugInfoFinder::iterator I = Finder.global_variable_begin(), + E = Finder.global_variable_end(); I != E; ++I) { + O << "GlobalVariable: "; + DIGlobalVariable(*I).print(O); + O << '\n'; + } + + for (DebugInfoFinder::iterator I = Finder.type_begin(), + E = Finder.type_end(); I != E; ++I) { + O << "Type: "; + DIType(*I).print(O); + O << '\n'; + } +} diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 46f3cbc..9b4370f 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -537,6 +537,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(coldcc); KEYWORD(x86_stdcallcc); KEYWORD(x86_fastcallcc); + KEYWORD(x86_thiscallcc); KEYWORD(arm_apcscc); KEYWORD(arm_aapcscc); KEYWORD(arm_aapcs_vfpcc); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 3b08ca1..226d8d3 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -1074,6 +1074,7 @@ bool LLParser::ParseOptionalVisibility(unsigned &Res) { /// ::= 'coldcc' /// ::= 'x86_stdcallcc' /// ::= 'x86_fastcallcc' +/// ::= 'x86_thiscallcc' /// ::= 'arm_apcscc' /// ::= 'arm_aapcscc' /// ::= 'arm_aapcs_vfpcc' @@ -1088,6 +1089,7 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) { case lltok::kw_coldcc: CC = CallingConv::Cold; break; case lltok::kw_x86_stdcallcc: CC = CallingConv::X86_StdCall; break; case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break; + case lltok::kw_x86_thiscallcc: CC = CallingConv::X86_ThisCall; break; case lltok::kw_arm_apcscc: CC = CallingConv::ARM_APCS; break; case lltok::kw_arm_aapcscc: CC = CallingConv::ARM_AAPCS; break; case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break; diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 3ac9169..5eed170 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -68,7 +68,7 @@ namespace lltok { kw_c, kw_cc, kw_ccc, kw_fastcc, kw_coldcc, - kw_x86_stdcallcc, kw_x86_fastcallcc, + kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc, kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc, kw_msp430_intrcc, diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index ded4b3f..5a0c27b 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -205,16 +205,10 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); // .weak_definition _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); - } else if (const char *LinkOnce = MAI->getLinkOnceDirective()) { + } else if (MAI->getLinkOnceDirective() != 0) { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); - // FIXME: linkonce should be a section attribute, handled by COFF Section - // assignment. - // http://sourceware.org/binutils/docs-2.20/as/Linkonce.html#Linkonce - // .linkonce discard - // FIXME: It would be nice to use .linkonce samesize for non-common - // globals. - OutStreamer.EmitRawText(StringRef(LinkOnce)); + //NOTE: linkonce is handled by the section the symbol was assigned to. } else { // .weak _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak); @@ -247,6 +241,12 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (EmitSpecialLLVMGlobal(GV)) return; + if (isVerbose()) { + WriteAsOperand(OutStreamer.GetCommentOS(), GV, + /*PrintType=*/false, GV->getParent()); + OutStreamer.GetCommentOS() << '\n'; + } + MCSymbol *GVSym = Mang->getSymbol(GV); EmitVisibility(GVSym, GV->getVisibility()); @@ -316,17 +316,57 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog); return; } + + // Handle thread local data for mach-o which requires us to output an + // additional structure of data and mangle the original symbol so that we + // can reference it later. + if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) { + // Emit the .tbss symbol + MCSymbol *MangSym = + OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init")); + + if (GVKind.isThreadBSS()) + OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog); + else if (GVKind.isThreadData()) { + OutStreamer.SwitchSection(TheSection); + + EmitLinkage(GV->getLinkage(), MangSym); + EmitAlignment(AlignLog, GV); + OutStreamer.EmitLabel(MangSym); + + EmitGlobalConstant(GV->getInitializer()); + } + + OutStreamer.AddBlankLine(); + + // Emit the variable struct for the runtime. + const MCSection *TLVSect + = getObjFileLowering().getTLSExtraDataSection(); + + OutStreamer.SwitchSection(TLVSect); + // Emit the linkage here. + EmitLinkage(GV->getLinkage(), GVSym); + OutStreamer.EmitLabel(GVSym); + + // Three pointers in size: + // - __tlv_bootstrap - used to make sure support exists + // - spare pointer, used when mapped by the runtime + // - pointer to mangled symbol above with initializer + unsigned PtrSize = TD->getPointerSizeInBits()/8; + OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("__tlv_bootstrap"), + PtrSize, 0); + OutStreamer.EmitIntValue(0, PtrSize, 0); + OutStreamer.EmitSymbolValue(MangSym, PtrSize, 0); + + OutStreamer.AddBlankLine(); + return; + } OutStreamer.SwitchSection(TheSection); EmitLinkage(GV->getLinkage(), GVSym); EmitAlignment(AlignLog, GV); - if (isVerbose()) { - WriteAsOperand(OutStreamer.GetCommentOS(), GV, - /*PrintType=*/false, GV->getParent()); - OutStreamer.GetCommentOS() << '\n'; - } OutStreamer.EmitLabel(GVSym); EmitGlobalConstant(GV->getInitializer()); @@ -408,7 +448,13 @@ void AsmPrinter::EmitFunctionHeader() { /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the /// function. This can be overridden by targets as required to do custom stuff. void AsmPrinter::EmitFunctionEntryLabel() { - OutStreamer.EmitLabel(CurrentFnSym); + // The function label could have already been emitted if two symbols end up + // conflicting due to asm renaming. Detect this and emit an error. + if (CurrentFnSym->isUndefined()) + return OutStreamer.EmitLabel(CurrentFnSym); + + report_fatal_error("'" + Twine(CurrentFnSym->getName()) + + "' label emitted multiple times to assembly file"); } @@ -503,7 +549,7 @@ static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { // cast away const; DIetc do not take const operands for some reason. DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata())); if (V.getContext().isSubprogram()) - OS << DISubprogram(V.getContext().getNode()).getDisplayName() << ":"; + OS << DISubprogram(V.getContext()).getDisplayName() << ":"; OS << V.getName() << " <- "; // Register or immediate value. Register 0 means undef. diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 37d10e5..ba6fed2 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -53,6 +53,17 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const { } SourceMgr SrcMgr; + + // Ensure the buffer is newline terminated. + char *TmpString = 0; + if (Str.back() != '\n') { + TmpString = new char[Str.size() + 2]; + memcpy(TmpString, Str.data(), Str.size()); + TmpString[Str.size()] = '\n'; + TmpString[Str.size() + 1] = 0; + isNullTerminated = true; + Str = TmpString; + } // If the current LLVMContext has an inline asm handler, set it in SourceMgr. LLVMContext &LLVMCtx = MMI->getModule()->getContext(); @@ -84,6 +95,9 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const { /*NoFinalize*/ true); if (Res && !HasDiagHandler) report_fatal_error("Error parsing inline asm\n"); + + if (TmpString) + delete[] TmpString; } diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index 9cb8314..d56c094 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -315,6 +315,10 @@ namespace llvm { /// virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + /// getValue - Get MCSymbol. + /// + const MCSymbol *getValue() const { return Label; } + /// SizeOf - Determine size of label value in bytes. /// virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index e9e9ba5..890507c 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -47,6 +47,10 @@ static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden, static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, cl::desc("Disable debug info printing")); +static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden, + cl::desc("Make an absense of debug location information explicit."), + cl::init(false)); + namespace { const char *DWARFGroupName = "DWARF Emission"; const char *DbgTimerName = "DWARF Debug Writer"; @@ -78,12 +82,12 @@ class CompileUnit { /// GVToDieMap - Tracks the mapping of unit level debug informaton /// variables to debug information entries. /// FIXME : Rename GVToDieMap -> NodeToDieMap - DenseMap<MDNode *, DIE *> GVToDieMap; + DenseMap<const MDNode *, DIE *> GVToDieMap; /// GVToDIEEntryMap - Tracks the mapping of unit level debug informaton /// descriptors to debug information entries using a DIEEntry proxy. /// FIXME : Rename - DenseMap<MDNode *, DIEEntry *> GVToDIEEntryMap; + DenseMap<const MDNode *, DIEEntry *> GVToDIEEntryMap; /// Globals - A map of globally visible named entities for this unit. /// @@ -119,24 +123,24 @@ public: /// getDIE - Returns the debug information entry map slot for the /// specified debug variable. - DIE *getDIE(MDNode *N) { return GVToDieMap.lookup(N); } + DIE *getDIE(const MDNode *N) { return GVToDieMap.lookup(N); } /// insertDIE - Insert DIE into the map. - void insertDIE(MDNode *N, DIE *D) { + void insertDIE(const MDNode *N, DIE *D) { GVToDieMap.insert(std::make_pair(N, D)); } /// getDIEEntry - Returns the debug information entry for the speciefied /// debug variable. - DIEEntry *getDIEEntry(MDNode *N) { - DenseMap<MDNode *, DIEEntry *>::iterator I = GVToDIEEntryMap.find(N); + DIEEntry *getDIEEntry(const MDNode *N) { + DenseMap<const MDNode *, DIEEntry *>::iterator I = GVToDIEEntryMap.find(N); if (I == GVToDIEEntryMap.end()) return NULL; return I->second; } /// insertDIEEntry - Insert debug information entry into the map. - void insertDIEEntry(MDNode *N, DIEEntry *E) { + void insertDIEEntry(const MDNode *N, DIEEntry *E) { GVToDIEEntryMap.insert(std::make_pair(N, E)); } @@ -164,31 +168,18 @@ public: /// class DbgVariable { DIVariable Var; // Variable Descriptor. - unsigned FrameIndex; // Variable frame index. - const MachineInstr *DbgValueMInsn; // DBG_VALUE - // DbgValueLabel - DBG_VALUE is effective from this label. - MCSymbol *DbgValueLabel; - DbgVariable *const AbstractVar; // Abstract variable for this variable. - DIE *TheDIE; + DIE *TheDIE; // Variable DIE. + unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. public: // AbsVar may be NULL. - DbgVariable(DIVariable V, unsigned I, DbgVariable *AbsVar) - : Var(V), FrameIndex(I), DbgValueMInsn(0), - DbgValueLabel(0), AbstractVar(AbsVar), TheDIE(0) {} - DbgVariable(DIVariable V, const MachineInstr *MI, DbgVariable *AbsVar) - : Var(V), FrameIndex(0), DbgValueMInsn(MI), DbgValueLabel(0), - AbstractVar(AbsVar), TheDIE(0) - {} + DbgVariable(DIVariable V) : Var(V), TheDIE(0), DotDebugLocOffset(~0U) {} // Accessors. DIVariable getVariable() const { return Var; } - unsigned getFrameIndex() const { return FrameIndex; } - const MachineInstr *getDbgValue() const { return DbgValueMInsn; } - MCSymbol *getDbgValueLabel() const { return DbgValueLabel; } - void setDbgValueLabel(MCSymbol *L) { DbgValueLabel = L; } - DbgVariable *getAbstractVariable() const { return AbstractVar; } void setDIE(DIE *D) { TheDIE = D; } DIE *getDIE() const { return TheDIE; } + void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } + unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } }; //===----------------------------------------------------------------------===// @@ -204,7 +195,7 @@ class DbgScope { DbgScope *Parent; // Parent to this scope. DIDescriptor Desc; // Debug info descriptor for scope. // Location at which this scope is inlined. - AssertingVH<MDNode> InlinedAtLocation; + AssertingVH<const MDNode> InlinedAtLocation; bool AbstractScope; // Abstract Scope const MachineInstr *LastInsn; // Last instruction of this scope. const MachineInstr *FirstInsn; // First instruction of this scope. @@ -217,7 +208,7 @@ class DbgScope { // Private state for dump() mutable unsigned IndentLevel; public: - DbgScope(DbgScope *P, DIDescriptor D, MDNode *I = 0) + DbgScope(DbgScope *P, DIDescriptor D, const MDNode *I = 0) : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(false), LastInsn(0), FirstInsn(0), DFSIn(0), DFSOut(0), IndentLevel(0) {} @@ -227,8 +218,8 @@ public: DbgScope *getParent() const { return Parent; } void setParent(DbgScope *P) { Parent = P; } DIDescriptor getDesc() const { return Desc; } - MDNode *getInlinedAt() const { return InlinedAtLocation; } - MDNode *getScopeNode() const { return Desc.getNode(); } + const MDNode *getInlinedAt() const { return InlinedAtLocation; } + const MDNode *getScopeNode() const { return Desc; } const SmallVector<DbgScope *, 4> &getScopes() { return Scopes; } const SmallVector<DbgVariable *, 8> &getVariables() { return Variables; } const SmallVector<DbgRange, 4> &getRanges() { return Ranges; } @@ -300,7 +291,7 @@ public: void DbgScope::dump() const { raw_ostream &err = dbgs(); err.indent(IndentLevel); - MDNode *N = Desc.getNode(); + const MDNode *N = Desc; N->dump(); if (AbstractScope) err << "Abstract Scope\n"; @@ -322,15 +313,15 @@ DbgScope::~DbgScope() { } DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) - : Asm(A), MMI(Asm->MMI), ModuleCU(0), + : Asm(A), MMI(Asm->MMI), FirstCU(0), AbbreviationsSet(InitAbbreviationsSetSize), CurrentFnDbgScope(0), PrevLabel(NULL) { NextStringPoolNumber = 0; DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; DwarfStrSectionSym = TextSectionSym = 0; - DwarfDebugRangeSectionSym = 0; - FunctionBeginSym = 0; + DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; + FunctionBeginSym = FunctionEndSym = 0; if (TimePassesIsEnabled) { NamedRegionTimer T(DbgTimerName, DWARFGroupName); beginModule(M); @@ -444,8 +435,8 @@ void DwarfDebug::addBlock(DIE *Die, unsigned Attribute, unsigned Form, /// addSourceLine - Add location information to specified debug information /// entry. void DwarfDebug::addSourceLine(DIE *Die, const DIVariable *V) { - // If there is no compile unit specified, don't add a line #. - if (!V->getCompileUnit().Verify()) + // Verify variable. + if (!V->Verify()) return; unsigned Line = V->getLineNumber(); @@ -458,9 +449,9 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIVariable *V) { /// addSourceLine - Add location information to specified debug information /// entry. -void DwarfDebug::addSourceLine(DIE *Die, const DIGlobal *G) { - // If there is no compile unit specified, don't add a line #. - if (!G->getCompileUnit().Verify()) +void DwarfDebug::addSourceLine(DIE *Die, const DIGlobalVariable *G) { + // Verify global variable. + if (!G->Verify()) return; unsigned Line = G->getLineNumber(); @@ -474,8 +465,8 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIGlobal *G) { /// addSourceLine - Add location information to specified debug information /// entry. void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) { - // If there is no compile unit specified, don't add a line #. - if (!SP->getCompileUnit().Verify()) + // Verify subprogram. + if (!SP->Verify()) return; // If the line number is 0, don't add it. if (SP->getLineNumber() == 0) @@ -494,9 +485,8 @@ void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) { /// addSourceLine - Add location information to specified debug information /// entry. void DwarfDebug::addSourceLine(DIE *Die, const DIType *Ty) { - // If there is no compile unit specified, don't add a line #. - DICompileUnit CU = Ty->getCompileUnit(); - if (!CU.Verify()) + // Verify type. + if (!Ty->Verify()) return; unsigned Line = Ty->getLineNumber(); @@ -512,8 +502,8 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIType *Ty) { /// addSourceLine - Add location information to specified debug information /// entry. void DwarfDebug::addSourceLine(DIE *Die, const DINameSpace *NS) { - // If there is no compile unit specified, don't add a line #. - if (!NS->getCompileUnit().Verify()) + // Verify namespace. + if (!NS->Verify()) return; unsigned Line = NS->getLineNumber(); @@ -560,16 +550,16 @@ DIType DwarfDebug::getBlockByrefType(DIType Ty, std::string Name) { unsigned tag = Ty.getTag(); if (tag == dwarf::DW_TAG_pointer_type) { - DIDerivedType DTy = DIDerivedType(Ty.getNode()); + DIDerivedType DTy = DIDerivedType(Ty); subType = DTy.getTypeDerivedFrom(); } - DICompositeType blockStruct = DICompositeType(subType.getNode()); + DICompositeType blockStruct = DICompositeType(subType); DIArray Elements = blockStruct.getTypeArray(); for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); - DIDerivedType DT = DIDerivedType(Element.getNode()); + DIDerivedType DT = DIDerivedType(Element); if (Name == DT.getName()) return (DT.getTypeDerivedFrom()); } @@ -700,12 +690,12 @@ void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, StringRef varName = VD.getName(); if (Tag == dwarf::DW_TAG_pointer_type) { - DIDerivedType DTy = DIDerivedType(Ty.getNode()); + DIDerivedType DTy = DIDerivedType(Ty); TmpTy = DTy.getTypeDerivedFrom(); isPointer = true; } - DICompositeType blockStruct = DICompositeType(TmpTy.getNode()); + DICompositeType blockStruct = DICompositeType(TmpTy); // Find the __forwarding field and the variable field in the __Block_byref // struct. @@ -715,7 +705,7 @@ void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) { DIDescriptor Element = Fields.getElement(i); - DIDerivedType DT = DIDerivedType(Element.getNode()); + DIDerivedType DT = DIDerivedType(Element); StringRef fieldName = DT.getName(); if (fieldName == "__forwarding") forwardingField = Element; @@ -725,9 +715,9 @@ void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, // Get the offsets for the forwarding field and the variable field. unsigned forwardingFieldOffset = - DIDerivedType(forwardingField.getNode()).getOffsetInBits() >> 3; + DIDerivedType(forwardingField).getOffsetInBits() >> 3; unsigned varFieldOffset = - DIDerivedType(varField.getNode()).getOffsetInBits() >> 3; + DIDerivedType(varField).getOffsetInBits() >> 3; // Decode the original location, and use that as the start of the byref // variable's location. @@ -813,7 +803,7 @@ void DwarfDebug::addAddress(DIE *Die, unsigned Attribute, } /// addRegisterAddress - Add register location entry in variable DIE. -bool DwarfDebug::addRegisterAddress(DIE *Die, DbgVariable *DV, +bool DwarfDebug::addRegisterAddress(DIE *Die, const MCSymbol *VS, const MachineOperand &MO) { assert (MO.isReg() && "Invalid machine operand!"); if (!MO.getReg()) @@ -821,26 +811,26 @@ bool DwarfDebug::addRegisterAddress(DIE *Die, DbgVariable *DV, MachineLocation Location; Location.set(MO.getReg()); addAddress(Die, dwarf::DW_AT_location, Location); - if (MCSymbol *VS = DV->getDbgValueLabel()) + if (VS) addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS); return true; } /// addConstantValue - Add constant value entry in variable DIE. -bool DwarfDebug::addConstantValue(DIE *Die, DbgVariable *DV, +bool DwarfDebug::addConstantValue(DIE *Die, const MCSymbol *VS, const MachineOperand &MO) { assert (MO.isImm() && "Invalid machine operand!"); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); unsigned Imm = MO.getImm(); addUInt(Block, 0, dwarf::DW_FORM_udata, Imm); addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - if (MCSymbol *VS = DV->getDbgValueLabel()) + if (VS) addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS); return true; } /// addConstantFPValue - Add constant value entry in variable DIE. -bool DwarfDebug::addConstantFPValue(DIE *Die, DbgVariable *DV, +bool DwarfDebug::addConstantFPValue(DIE *Die, const MCSymbol *VS, const MachineOperand &MO) { assert (MO.isFPImm() && "Invalid machine operand!"); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); @@ -862,10 +852,8 @@ bool DwarfDebug::addConstantFPValue(DIE *Die, DbgVariable *DV, (unsigned char)0xFF & FltPtr[Start]); addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - - if (MCSymbol *VS = DV->getDbgValueLabel()) - addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, - VS); + if (VS) + addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS); return true; } @@ -873,34 +861,35 @@ bool DwarfDebug::addConstantFPValue(DIE *Die, DbgVariable *DV, /// addToContextOwner - Add Die into the list of its context owner's children. void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) { if (Context.isType()) { - DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context.getNode())); + DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context)); ContextDIE->addChild(Die); } else if (Context.isNameSpace()) { - DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context.getNode())); + DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context)); ContextDIE->addChild(Die); - } else if (DIE *ContextDIE = ModuleCU->getDIE(Context.getNode())) + } else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context)) ContextDIE->addChild(Die); else - ModuleCU->addDie(Die); + getCompileUnit(Context)->addDie(Die); } /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the /// given DIType. DIE *DwarfDebug::getOrCreateTypeDIE(DIType Ty) { - DIE *TyDIE = ModuleCU->getDIE(Ty.getNode()); + CompileUnit *TypeCU = getCompileUnit(Ty); + DIE *TyDIE = TypeCU->getDIE(Ty); if (TyDIE) return TyDIE; // Create new type. TyDIE = new DIE(dwarf::DW_TAG_base_type); - ModuleCU->insertDIE(Ty.getNode(), TyDIE); + TypeCU->insertDIE(Ty, TyDIE); if (Ty.isBasicType()) - constructTypeDIE(*TyDIE, DIBasicType(Ty.getNode())); + constructTypeDIE(*TyDIE, DIBasicType(Ty)); else if (Ty.isCompositeType()) - constructTypeDIE(*TyDIE, DICompositeType(Ty.getNode())); + constructTypeDIE(*TyDIE, DICompositeType(Ty)); else { assert(Ty.isDerivedType() && "Unknown kind of DIType"); - constructTypeDIE(*TyDIE, DIDerivedType(Ty.getNode())); + constructTypeDIE(*TyDIE, DIDerivedType(Ty)); } addToContextOwner(TyDIE, Ty.getContext()); @@ -909,11 +898,12 @@ DIE *DwarfDebug::getOrCreateTypeDIE(DIType Ty) { /// addType - Add a new type attribute to the specified entity. void DwarfDebug::addType(DIE *Entity, DIType Ty) { - if (!Ty.isValid()) + if (!Ty.Verify()) return; // Check for pre-existence. - DIEEntry *Entry = ModuleCU->getDIEEntry(Ty.getNode()); + CompileUnit *TypeCU = getCompileUnit(Ty); + DIEEntry *Entry = TypeCU->getDIEEntry(Ty); // If it exists then use the existing value. if (Entry) { Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); @@ -925,7 +915,7 @@ void DwarfDebug::addType(DIE *Entity, DIType Ty) { // Set up proxy. Entry = createDIEEntry(Buffer); - ModuleCU->insertDIEEntry(Ty.getNode(), Entry); + TypeCU->insertDIEEntry(Ty, Entry); Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); } @@ -994,9 +984,9 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Add enumerators to enumeration type. for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIE *ElemDie = NULL; - DIDescriptor Enum(Elements.getElement(i).getNode()); + DIDescriptor Enum(Elements.getElement(i)); if (Enum.isEnumerator()) { - ElemDie = constructEnumTypeDIE(DIEnumerator(Enum.getNode())); + ElemDie = constructEnumTypeDIE(DIEnumerator(Enum)); Buffer.addChild(ElemDie); } } @@ -1006,7 +996,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Add return type. DIArray Elements = CTy.getTypeArray(); DIDescriptor RTy = Elements.getElement(0); - addType(&Buffer, DIType(RTy.getNode())); + addType(&Buffer, DIType(RTy)); // Add prototype flag. addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); @@ -1015,7 +1005,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); DIDescriptor Ty = Elements.getElement(i); - addType(Arg, DIType(Ty.getNode())); + addType(Arg, DIType(Ty)); Buffer.addChild(Arg); } } @@ -1036,9 +1026,9 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { DIDescriptor Element = Elements.getElement(i); DIE *ElemDie = NULL; if (Element.isSubprogram()) - ElemDie = createSubprogramDIE(DISubprogram(Element.getNode())); + ElemDie = createSubprogramDIE(DISubprogram(Element)); else if (Element.isVariable()) { - DIVariable DV(Element.getNode()); + DIVariable DV(Element); ElemDie = new DIE(dwarf::DW_TAG_variable); addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, DV.getName()); @@ -1047,7 +1037,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); addSourceLine(ElemDie, &DV); } else if (Element.isDerivedType()) - ElemDie = createMemberDIE(DIDerivedType(Element.getNode())); + ElemDie = createMemberDIE(DIDerivedType(Element)); else continue; Buffer.addChild(ElemDie); @@ -1062,9 +1052,9 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { dwarf::DW_FORM_data1, RLang); DICompositeType ContainingType = CTy.getContainingType(); - if (DIDescriptor(ContainingType.getNode()).isCompositeType()) + if (DIDescriptor(ContainingType).isCompositeType()) addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, - getOrCreateTypeDIE(DIType(ContainingType.getNode()))); + getOrCreateTypeDIE(DIType(ContainingType))); break; } default: @@ -1120,22 +1110,23 @@ void DwarfDebug::constructArrayTypeDIE(DIE &Buffer, DIArray Elements = CTy->getTypeArray(); // Get an anonymous type for index type. - DIE *IdxTy = ModuleCU->getIndexTyDie(); + CompileUnit *TheCU = getCompileUnit(*CTy); + DIE *IdxTy = TheCU->getIndexTyDie(); if (!IdxTy) { // Construct an anonymous type for index type. IdxTy = new DIE(dwarf::DW_TAG_base_type); addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t)); addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, dwarf::DW_ATE_signed); - ModuleCU->addDie(IdxTy); - ModuleCU->setIndexTyDie(IdxTy); + TheCU->addDie(IdxTy); + TheCU->setIndexTyDie(IdxTy); } // Add subranges to array type. for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); if (Element.getTag() == dwarf::DW_TAG_subrange_type) - constructSubrangeDIE(Buffer, DISubrange(Element.getNode()), IdxTy); + constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy); } } @@ -1262,7 +1253,8 @@ DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) { /// createSubprogramDIE - Create new DIE using SP. DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { - DIE *SPDie = ModuleCU->getDIE(SP.getNode()); + CompileUnit *SPCU = getCompileUnit(SP); + DIE *SPDie = SPCU->getDIE(SP); if (SPDie) return SPDie; @@ -1292,7 +1284,7 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type) addType(SPDie, SPTy); else - addType(SPDie, DIType(Args.getElement(0).getNode())); + addType(SPDie, DIType(Args.getElement(0))); unsigned VK = SP.getVirtuality(); if (VK) { @@ -1302,7 +1294,7 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { addUInt(Block, 0, dwarf::DW_FORM_data1, SP.getVirtualIndex()); addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block); ContainingTypeMap.insert(std::make_pair(SPDie, - SP.getContainingType().getNode())); + SP.getContainingType())); } if (MakeDecl || !SP.isDefinition()) { @@ -1317,7 +1309,7 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { if (SPTag == dwarf::DW_TAG_subroutine_type) for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - DIType ATy = DIType(DIType(Args.getElement(i).getNode())); + DIType ATy = DIType(DIType(Args.getElement(i))); addType(Arg, ATy); if (ATy.isArtificial()) addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); @@ -1335,12 +1327,12 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); // DW_TAG_inlined_subroutine may refer to this DIE. - ModuleCU->insertDIE(SP.getNode(), SPDie); + SPCU->insertDIE(SP, SPDie); return SPDie; } -DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) { +DbgScope *DwarfDebug::getOrCreateAbstractScope(const MDNode *N) { assert(N && "Invalid Scope encoding!"); DbgScope *AScope = AbstractScopes.lookup(N); @@ -1353,7 +1345,7 @@ DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) { if (Scope.isLexicalBlock()) { DILexicalBlock DB(N); DIDescriptor ParentDesc = DB.getContext(); - Parent = getOrCreateAbstractScope(ParentDesc.getNode()); + Parent = getOrCreateAbstractScope(ParentDesc); } AScope = new DbgScope(Parent, DIDescriptor(N), NULL); @@ -1369,14 +1361,14 @@ DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) { /// isSubprogramContext - Return true if Context is either a subprogram /// or another context nested inside a subprogram. -static bool isSubprogramContext(MDNode *Context) { +static bool isSubprogramContext(const MDNode *Context) { if (!Context) return false; DIDescriptor D(Context); if (D.isSubprogram()) return true; if (D.isType()) - return isSubprogramContext(DIType(Context).getContext().getNode()); + return isSubprogramContext(DIType(Context).getContext()); return false; } @@ -1384,8 +1376,9 @@ static bool isSubprogramContext(MDNode *Context) { /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes. /// If there are global variables in this scope then create and insert /// DIEs for these variables. -DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) { - DIE *SPDie = ModuleCU->getDIE(SPNode); +DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) { + CompileUnit *SPCU = getCompileUnit(SPNode); + DIE *SPDie = SPCU->getDIE(SPNode); assert(SPDie && "Unable to find subprogram DIE!"); DISubprogram SP(SPNode); @@ -1396,7 +1389,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) { // specification DIE for a function defined inside a function. if (SP.isDefinition() && !SP.getContext().isCompileUnit() && !SP.getContext().isFile() && - !isSubprogramContext(SP.getContext().getNode())) { + !isSubprogramContext(SP.getContext())) { addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); // Add arguments. @@ -1406,7 +1399,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) { if (SPTag == dwarf::DW_TAG_subroutine_type) for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - DIType ATy = DIType(DIType(Args.getElement(i).getNode())); + DIType ATy = DIType(DIType(Args.getElement(i))); addType(Arg, ATy); if (ATy.isArtificial()) addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); @@ -1416,7 +1409,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) { SPDie = new DIE(dwarf::DW_TAG_subprogram); addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, SPDeclDie); - ModuleCU->addDie(SPDie); + SPCU->addDie(SPDie); } addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, @@ -1451,18 +1444,18 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize()); for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { - DebugRangeSymbols.push_back(LabelsBeforeInsn.lookup(RI->first)); - DebugRangeSymbols.push_back(LabelsAfterInsn.lookup(RI->second)); + DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); + DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second)); } DebugRangeSymbols.push_back(NULL); DebugRangeSymbols.push_back(NULL); return ScopeDIE; } - MCSymbol *Start = LabelsBeforeInsn.lookup(RI->first); - MCSymbol *End = LabelsAfterInsn.lookup(RI->second); + const MCSymbol *Start = getLabelBeforeInsn(RI->first); + const MCSymbol *End = getLabelAfterInsn(RI->second); - if (Start == 0 || End == 0) return 0; + if (End == 0) return 0; assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); assert(End->isDefined() && "Invalid end label for an inlined scope!"); @@ -1487,10 +1480,10 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { // For now, use first instruction range and emit low_pc/high_pc pair and // corresponding .debug_inlined section entry for this pair. SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(); - MCSymbol *StartLabel = LabelsBeforeInsn.lookup(RI->first); - MCSymbol *EndLabel = LabelsAfterInsn.lookup(RI->second); + const MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); + const MCSymbol *EndLabel = getLabelAfterInsn(RI->second); - if (StartLabel == 0 || EndLabel == 0) { + if (StartLabel == FunctionBeginSym || EndLabel == 0) { assert (0 && "Unexpected Start and End labels for a inlined scope!"); return 0; } @@ -1504,8 +1497,9 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { DIScope DS(Scope->getScopeNode()); DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine); - DISubprogram InlinedSP = getDISubprogram(DS.getNode()); - DIE *OriginDIE = ModuleCU->getDIE(InlinedSP.getNode()); + DISubprogram InlinedSP = getDISubprogram(DS); + CompileUnit *TheCU = getCompileUnit(InlinedSP); + DIE *OriginDIE = TheCU->getDIE(InlinedSP); assert(OriginDIE && "Unable to find Origin DIE!"); addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, dwarf::DW_FORM_ref4, OriginDIE); @@ -1516,18 +1510,18 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { InlinedSubprogramDIEs.insert(OriginDIE); // Track the start label for this inlined function. - DenseMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator - I = InlineInfo.find(InlinedSP.getNode()); + DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator + I = InlineInfo.find(InlinedSP); if (I == InlineInfo.end()) { - InlineInfo[InlinedSP.getNode()].push_back(std::make_pair(StartLabel, + InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, ScopeDIE)); - InlinedSPNodes.push_back(InlinedSP.getNode()); + InlinedSPNodes.push_back(InlinedSP); } else I->second.push_back(std::make_pair(StartLabel, ScopeDIE)); DILocation DL(Scope->getInlinedAt()); - addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, ModuleCU->getID()); + addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID()); addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); return ScopeDIE; @@ -1560,22 +1554,15 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { // Define variable debug information entry. DIE *VariableDie = new DIE(Tag); - DIE *AbsDIE = NULL; - if (DbgVariable *AV = DV->getAbstractVariable()) - AbsDIE = AV->getDIE(); - - if (AbsDIE) { - DIScope DS(Scope->getScopeNode()); - DISubprogram InlinedSP = getDISubprogram(DS.getNode()); - DIE *OriginSPDIE = ModuleCU->getDIE(InlinedSP.getNode()); - (void) OriginSPDIE; - assert(OriginSPDIE && "Unable to find Origin DIE for the SP!"); - DIE *AbsDIE = DV->getAbstractVariable()->getDIE(); - assert(AbsDIE && "Unable to find Origin DIE for the Variable!"); + DenseMap<const DbgVariable *, const DbgVariable *>::iterator + V2AVI = VarToAbstractVarMap.find(DV); + if (V2AVI != VarToAbstractVarMap.end()) + AbsDIE = V2AVI->second->getDIE(); + + if (AbsDIE) addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, dwarf::DW_FORM_ref4, AbsDIE); - } else { addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); addSourceLine(VariableDie, &VD); @@ -1589,55 +1576,76 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { addType(VariableDie, VD.getType()); } + if (Tag == dwarf::DW_TAG_formal_parameter && VD.getType().isArtificial()) + addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + + if (Scope->isAbstractScope()) { + DV->setDIE(VariableDie); + return VariableDie; + } + // Add variable address. - if (!Scope->isAbstractScope()) { - // Check if variable is described by DBG_VALUE instruction. - if (const MachineInstr *DVInsn = DV->getDbgValue()) { - bool updated = false; - // FIXME : Handle getNumOperands != 3 - if (DVInsn->getNumOperands() == 3) { - if (DVInsn->getOperand(0).isReg()) - updated = addRegisterAddress(VariableDie, DV, DVInsn->getOperand(0)); - else if (DVInsn->getOperand(0).isImm()) - updated = addConstantValue(VariableDie, DV, DVInsn->getOperand(0)); - else if (DVInsn->getOperand(0).isFPImm()) - updated = addConstantFPValue(VariableDie, DV, DVInsn->getOperand(0)); - } else { - MachineLocation Location = Asm->getDebugValueLocation(DVInsn); - if (Location.getReg()) { - addAddress(VariableDie, dwarf::DW_AT_location, Location); - if (MCSymbol *VS = DV->getDbgValueLabel()) - addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, - VS); - updated = true; - } - } - if (!updated) { - // If variableDie is not updated then DBG_VALUE instruction does not - // have valid variable info. - delete VariableDie; - return NULL; - } - } - else { - MachineLocation Location; - unsigned FrameReg; - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - int Offset = RI->getFrameIndexReference(*Asm->MF, DV->getFrameIndex(), - FrameReg); - Location.set(FrameReg, Offset); - - if (VD.hasComplexAddress()) - addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); - else if (VD.isBlockByrefVariable()) - addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); - else + + unsigned Offset = DV->getDotDebugLocOffset(); + if (Offset != ~0U) { + addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4, + Asm->GetTempSymbol("debug_loc", Offset)); + DV->setDIE(VariableDie); + UseDotDebugLocEntry.insert(VariableDie); + return VariableDie; + } + + // Check if variable is described by a DBG_VALUE instruction. + DenseMap<const DbgVariable *, const MachineInstr *>::iterator DVI = + DbgVariableToDbgInstMap.find(DV); + if (DVI != DbgVariableToDbgInstMap.end()) { + const MachineInstr *DVInsn = DVI->second; + const MCSymbol *DVLabel = findVariableLabel(DV); + bool updated = false; + // FIXME : Handle getNumOperands != 3 + if (DVInsn->getNumOperands() == 3) { + if (DVInsn->getOperand(0).isReg()) + updated = addRegisterAddress(VariableDie, DVLabel, DVInsn->getOperand(0)); + else if (DVInsn->getOperand(0).isImm()) + updated = addConstantValue(VariableDie, DVLabel, DVInsn->getOperand(0)); + else if (DVInsn->getOperand(0).isFPImm()) + updated = addConstantFPValue(VariableDie, DVLabel, DVInsn->getOperand(0)); + } else { + MachineLocation Location = Asm->getDebugValueLocation(DVInsn); + if (Location.getReg()) { addAddress(VariableDie, dwarf::DW_AT_location, Location); + if (DVLabel) + addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, + DVLabel); + updated = true; + } } - } + if (!updated) { + // If variableDie is not updated then DBG_VALUE instruction does not + // have valid variable info. + delete VariableDie; + return NULL; + } + DV->setDIE(VariableDie); + return VariableDie; + } - if (Tag == dwarf::DW_TAG_formal_parameter && VD.getType().isArtificial()) - addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + // .. else use frame index, if available. + MachineLocation Location; + unsigned FrameReg; + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + int FI = 0; + if (findVariableFrameIndex(DV, &FI)) { + int Offset = RI->getFrameIndexReference(*Asm->MF, FI, FrameReg); + Location.set(FrameReg, Offset); + + if (VD.hasComplexAddress()) + addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); + else if (VD.isBlockByrefVariable()) + addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); + else + addAddress(VariableDie, dwarf::DW_AT_location, Location); + } DV->setDIE(VariableDie); return VariableDie; @@ -1651,14 +1659,15 @@ void DwarfDebug::addPubTypes(DISubprogram SP) { DIArray Args = SPTy.getTypeArray(); for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) { - DIType ATy(Args.getElement(i).getNode()); - if (!ATy.isValid()) + DIType ATy(Args.getElement(i)); + if (!ATy.Verify()) continue; DICompositeType CATy = getDICompositeType(ATy); - if (DIDescriptor(CATy.getNode()).Verify() && !CATy.getName().empty() + if (DIDescriptor(CATy).Verify() && !CATy.getName().empty() && !CATy.isForwardDecl()) { - if (DIEEntry *Entry = ModuleCU->getDIEEntry(CATy.getNode())) - ModuleCU->addGlobalType(CATy.getName(), Entry->getEntry()); + CompileUnit *TheCU = getCompileUnit(CATy); + if (DIEEntry *Entry = TheCU->getDIEEntry(CATy)) + TheCU->addGlobalType(CATy.getName(), Entry->getEntry()); } } } @@ -1674,9 +1683,9 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { ScopeDIE = constructInlinedScopeDIE(Scope); else if (DS.isSubprogram()) { if (Scope->isAbstractScope()) - ScopeDIE = ModuleCU->getDIE(DS.getNode()); + ScopeDIE = getCompileUnit(DS)->getDIE(DS); else - ScopeDIE = updateSubprogramScopeDIE(DS.getNode()); + ScopeDIE = updateSubprogramScopeDIE(DS); } else ScopeDIE = constructLexicalScopeDIE(Scope); @@ -1700,7 +1709,7 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { } if (DS.isSubprogram()) - addPubTypes(DISubprogram(DS.getNode())); + addPubTypes(DISubprogram(DS)); return ScopeDIE; } @@ -1744,11 +1753,12 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName){ /// getOrCreateNameSpace - Create a DIE for DINameSpace. DIE *DwarfDebug::getOrCreateNameSpace(DINameSpace NS) { - DIE *NDie = ModuleCU->getDIE(NS.getNode()); + CompileUnit *TheCU = getCompileUnit(NS); + DIE *NDie = TheCU->getDIE(NS); if (NDie) return NDie; NDie = new DIE(dwarf::DW_TAG_namespace); - ModuleCU->insertDIE(NS.getNode(), NDie); + TheCU->insertDIE(NS, NDie); if (!NS.getName().empty()) addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName()); addSourceLine(NDie, &NS); @@ -1756,12 +1766,10 @@ DIE *DwarfDebug::getOrCreateNameSpace(DINameSpace NS) { return NDie; } -void DwarfDebug::constructCompileUnit(MDNode *N) { +/// constructCompileUnit - Create new CompileUnit for the given +/// metadata node with tag DW_TAG_compile_unit. +void DwarfDebug::constructCompileUnit(const MDNode *N) { DICompileUnit DIUnit(N); - // Use first compile unit marked as isMain as the compile unit for this - // module. - if (ModuleCU || !DIUnit.isMain()) - return; StringRef FN = DIUnit.getFilename(); StringRef Dir = DIUnit.getDirectory(); unsigned ID = GetOrCreateSourceID(Dir, FN); @@ -1794,12 +1802,44 @@ void DwarfDebug::constructCompileUnit(MDNode *N) { addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, dwarf::DW_FORM_data1, RVer); - assert(!ModuleCU && - "ModuleCU assigned since the top of constructCompileUnit"); - ModuleCU = new CompileUnit(ID, Die); + CompileUnit *NewCU = new CompileUnit(ID, Die); + if (!FirstCU) + FirstCU = NewCU; + CUMap.insert(std::make_pair(N, NewCU)); +} + +/// getCompielUnit - Get CompileUnit DIE. +CompileUnit *DwarfDebug::getCompileUnit(const MDNode *N) const { + assert (N && "Invalid DwarfDebug::getCompileUnit argument!"); + DIDescriptor D(N); + const MDNode *CUNode = NULL; + if (D.isCompileUnit()) + CUNode = N; + else if (D.isSubprogram()) + CUNode = DISubprogram(N).getCompileUnit(); + else if (D.isType()) + CUNode = DIType(N).getCompileUnit(); + else if (D.isGlobalVariable()) + CUNode = DIGlobalVariable(N).getCompileUnit(); + else if (D.isVariable()) + CUNode = DIVariable(N).getCompileUnit(); + else if (D.isNameSpace()) + CUNode = DINameSpace(N).getCompileUnit(); + else if (D.isFile()) + CUNode = DIFile(N).getCompileUnit(); + else + return FirstCU; + + DenseMap<const MDNode *, CompileUnit *>::const_iterator I + = CUMap.find(CUNode); + if (I == CUMap.end()) + return FirstCU; + return I->second; } -void DwarfDebug::constructGlobalVariableDIE(MDNode *N) { + +/// constructGlobalVariableDIE - Construct global variable DIE. +void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) { DIGlobalVariable DI_GV(N); // If debug information is malformed then ignore it. @@ -1807,7 +1847,8 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) { return; // Check for pre-existence. - if (ModuleCU->getDIE(DI_GV.getNode())) + CompileUnit *TheCU = getCompileUnit(N); + if (TheCU->getDIE(DI_GV)) return; DIE *VariableDie = createGlobalVariableDIE(DI_GV); @@ -1815,7 +1856,7 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) { return; // Add to map. - ModuleCU->insertDIE(N, VariableDie); + TheCU->insertDIE(N, VariableDie); // Add to context owner. DIDescriptor GVContext = DI_GV.getContext(); @@ -1823,7 +1864,7 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) { // or a subprogram. if (DI_GV.isDefinition() && !GVContext.isCompileUnit() && !GVContext.isFile() && - !isSubprogramContext(GVContext.getNode())) { + !isSubprogramContext(GVContext)) { // Create specification DIE. DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, @@ -1834,7 +1875,7 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) { Asm->Mang->getSymbol(DI_GV.getGlobal())); addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); addUInt(VariableDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - ModuleCU->addDie(VariableSpecDIE); + TheCU->addDie(VariableSpecDIE); } else { DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); @@ -1845,23 +1886,25 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) { addToContextOwner(VariableDie, GVContext); // Expose as global. FIXME - need to check external flag. - ModuleCU->addGlobal(DI_GV.getName(), VariableDie); + TheCU->addGlobal(DI_GV.getName(), VariableDie); DIType GTy = DI_GV.getType(); if (GTy.isCompositeType() && !GTy.getName().empty() && !GTy.isForwardDecl()) { - DIEEntry *Entry = ModuleCU->getDIEEntry(GTy.getNode()); + DIEEntry *Entry = TheCU->getDIEEntry(GTy); assert(Entry && "Missing global type!"); - ModuleCU->addGlobalType(GTy.getName(), Entry->getEntry()); + TheCU->addGlobalType(GTy.getName(), Entry->getEntry()); } return; } -void DwarfDebug::constructSubprogramDIE(MDNode *N) { +/// construct SubprogramDIE - Construct subprogram DIE. +void DwarfDebug::constructSubprogramDIE(const MDNode *N) { DISubprogram SP(N); // Check for pre-existence. - if (ModuleCU->getDIE(N)) + CompileUnit *TheCU = getCompileUnit(N); + if (TheCU->getDIE(N)) return; if (!SP.isDefinition()) @@ -1872,13 +1915,13 @@ void DwarfDebug::constructSubprogramDIE(MDNode *N) { DIE *SubprogramDie = createSubprogramDIE(SP); // Add to map. - ModuleCU->insertDIE(N, SubprogramDie); + TheCU->insertDIE(N, SubprogramDie); // Add to context owner. addToContextOwner(SubprogramDie, SP.getContext()); // Expose as global. - ModuleCU->addGlobal(SP.getName(), SubprogramDie); + TheCU->addGlobal(SP.getName(), SubprogramDie); return; } @@ -1952,7 +1995,7 @@ void DwarfDebug::beginModule(Module *M) { /// endModule - Emit all Dwarf sections that should come after the content. /// void DwarfDebug::endModule() { - if (!ModuleCU) return; + if (!FirstCU) return; // Attach DW_AT_inline attribute with inlined subprogram DIEs. for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(), @@ -1961,12 +2004,12 @@ void DwarfDebug::endModule() { addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); } - for (DenseMap<DIE *, MDNode *>::iterator CI = ContainingTypeMap.begin(), + for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(), CE = ContainingTypeMap.end(); CI != CE; ++CI) { DIE *SPDie = CI->first; - MDNode *N = dyn_cast_or_null<MDNode>(CI->second); + const MDNode *N = dyn_cast_or_null<MDNode>(CI->second); if (!N) continue; - DIE *NDie = ModuleCU->getDIE(N); + DIE *NDie = getCompileUnit(N)->getDIE(N); if (!NDie) continue; addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); } @@ -2027,68 +2070,48 @@ void DwarfDebug::endModule() { // Emit info into a debug str section. emitDebugStr(); - delete ModuleCU; - ModuleCU = NULL; // Reset for the next Module, if any. + for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) + delete I->second; + FirstCU = NULL; // Reset for the next Module, if any. } /// findAbstractVariable - Find abstract variable, if any, associated with Var. -DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, - unsigned FrameIdx, +DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, DebugLoc ScopeLoc) { - DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var.getNode()); + DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var); if (AbsDbgVariable) return AbsDbgVariable; - LLVMContext &Ctx = Var.getNode()->getContext(); + LLVMContext &Ctx = Var->getContext(); DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope(Ctx)); if (!Scope) return NULL; - AbsDbgVariable = new DbgVariable(Var, FrameIdx, - NULL /* No more-abstract variable*/); + AbsDbgVariable = new DbgVariable(Var); Scope->addVariable(AbsDbgVariable); - AbstractVariables[Var.getNode()] = AbsDbgVariable; + AbstractVariables[Var] = AbsDbgVariable; return AbsDbgVariable; } -/// findAbstractVariable - Find abstract variable, if any, associated with Var. -/// FIXME : Refactor findAbstractVariable. -DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, - const MachineInstr *MI, - DebugLoc ScopeLoc) { - - DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var.getNode()); - if (AbsDbgVariable) - return AbsDbgVariable; - - LLVMContext &Ctx = Var.getNode()->getContext(); - DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope(Ctx)); - if (!Scope) - return NULL; - - AbsDbgVariable = new DbgVariable(Var, MI, - NULL /* No more-abstract variable*/); - Scope->addVariable(AbsDbgVariable); - AbstractVariables[Var.getNode()] = AbsDbgVariable; - DbgValueStartMap[MI] = AbsDbgVariable; - return AbsDbgVariable; -} - -/// collectVariableInfo - Populate DbgScope entries with variables' info. -void DwarfDebug::collectVariableInfo() { +/// collectVariableInfoFromMMITable - Collect variable information from +/// side table maintained by MMI. +void +DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF, + SmallPtrSet<const MDNode *, 16> &Processed) { const LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); - MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), VE = VMap.end(); VI != VE; ++VI) { - MDNode *Var = VI->first; + const MDNode *Var = VI->first; if (!Var) continue; + Processed.insert(Var); DIVariable DV(Var); const std::pair<unsigned, DebugLoc> &VP = VI->second; DbgScope *Scope = 0; - if (MDNode *IA = VP.second.getInlinedAt(Ctx)) + if (const MDNode *IA = VP.second.getInlinedAt(Ctx)) Scope = ConcreteScopes.lookup(IA); if (Scope == 0) Scope = DbgScopeMap.lookup(VP.second.getScope(Ctx)); @@ -2097,100 +2120,192 @@ void DwarfDebug::collectVariableInfo() { if (Scope == 0) continue; - DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.first, VP.second); - DbgVariable *RegVar = new DbgVariable(DV, VP.first, AbsDbgVariable); + DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second); + DbgVariable *RegVar = new DbgVariable(DV); + recordVariableFrameIndex(RegVar, VP.first); Scope->addVariable(RegVar); + if (AbsDbgVariable) { + recordVariableFrameIndex(AbsDbgVariable, VP.first); + VarToAbstractVarMap[RegVar] = AbsDbgVariable; + } } +} +/// isDbgValueInUndefinedReg - Return true if debug value, encoded by +/// DBG_VALUE instruction, is in undefined reg. +static bool isDbgValueInUndefinedReg(const MachineInstr *MI) { + assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); + if (MI->getOperand(0).isReg() && !MI->getOperand(0).getReg()) + return true; + return false; +} + +/// isDbgValueInDefinedReg - Return true if debug value, encoded by +/// DBG_VALUE instruction, is in a defined reg. +static bool isDbgValueInDefinedReg(const MachineInstr *MI) { + assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); + if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg()) + return true; + return false; +} + +/// collectVariableInfo - Populate DbgScope entries with variables' info. +void DwarfDebug::collectVariableInfo(const MachineFunction *MF) { + SmallPtrSet<const MDNode *, 16> Processed; + + /// collection info from MMI table. + collectVariableInfoFromMMITable(MF, Processed); + + SmallVector<const MachineInstr *, 8> DbgValues; // Collect variable information from DBG_VALUE machine instructions; for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end(); - I != E; ++I) { + I != E; ++I) for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { const MachineInstr *MInsn = II; - if (!MInsn->isDebugValue()) + if (!MInsn->isDebugValue() || isDbgValueInUndefinedReg(MInsn)) continue; + DbgValues.push_back(MInsn); + } - // Ignore Undef values. - if (MInsn->getOperand(0).isReg() && !MInsn->getOperand(0).getReg()) - continue; + // This is a collection of DBV_VALUE instructions describing same variable. + SmallVector<const MachineInstr *, 4> MultipleValues; + for(SmallVector<const MachineInstr *, 8>::iterator I = DbgValues.begin(), + E = DbgValues.end(); I != E; ++I) { + const MachineInstr *MInsn = *I; + MultipleValues.clear(); + if (isDbgValueInDefinedReg(MInsn)) + MultipleValues.push_back(MInsn); + DIVariable DV(MInsn->getOperand(MInsn->getNumOperands() - 1).getMetadata()); + if (Processed.count(DV) != 0) + continue; + + for (SmallVector<const MachineInstr *, 8>::iterator MI = I+1, + ME = DbgValues.end(); MI != ME; ++MI) { + const MDNode *Var = + (*MI)->getOperand((*MI)->getNumOperands()-1).getMetadata(); + if (Var == DV && isDbgValueInDefinedReg(*MI)) + MultipleValues.push_back(*MI); + } + + DbgScope *Scope = findDbgScope(MInsn); + if (!Scope && DV.getTag() == dwarf::DW_TAG_arg_variable) + Scope = CurrentFnDbgScope; + // If variable scope is not found then skip this variable. + if (!Scope) + continue; - DIVariable DV( - const_cast<MDNode *>(MInsn->getOperand(MInsn->getNumOperands() - 1) - .getMetadata())); - if (DV.getTag() == dwarf::DW_TAG_arg_variable) { - // FIXME Handle inlined subroutine arguments. - DbgVariable *ArgVar = new DbgVariable(DV, MInsn, NULL); - CurrentFnDbgScope->addVariable(ArgVar); - DbgValueStartMap[MInsn] = ArgVar; + Processed.insert(DV); + DbgVariable *RegVar = new DbgVariable(DV); + Scope->addVariable(RegVar); + if (DV.getTag() != dwarf::DW_TAG_arg_variable) + DbgVariableLabelsMap[RegVar] = getLabelBeforeInsn(MInsn); + if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) { + DbgVariableToDbgInstMap[AbsVar] = MInsn; + VarToAbstractVarMap[RegVar] = AbsVar; + } + if (MultipleValues.size() <= 1) { + DbgVariableToDbgInstMap[RegVar] = MInsn; + continue; + } + + // handle multiple DBG_VALUE instructions describing one variable. + if (DotDebugLocEntries.empty()) + RegVar->setDotDebugLocOffset(0); + else + RegVar->setDotDebugLocOffset(DotDebugLocEntries.size()); + const MachineInstr *Begin = NULL; + const MachineInstr *End = NULL; + for (SmallVector<const MachineInstr *, 4>::iterator + MVI = MultipleValues.begin(), MVE = MultipleValues.end(); MVI != MVE; ++MVI) { + if (!Begin) { + Begin = *MVI; continue; + } + End = *MVI; + MachineLocation MLoc; + MLoc.set(Begin->getOperand(0).getReg(), 0); + const MCSymbol *FLabel = getLabelBeforeInsn(Begin); + const MCSymbol *SLabel = getLabelBeforeInsn(End); + DotDebugLocEntries.push_back(DotDebugLocEntry(FLabel, SLabel, MLoc)); + Begin = End; + if (MVI + 1 == MVE) { + // If End is the last instruction then its value is valid + // until the end of the funtion. + MLoc.set(End->getOperand(0).getReg(), 0); + DotDebugLocEntries. + push_back(DotDebugLocEntry(SLabel, FunctionEndSym, MLoc)); } + } + DotDebugLocEntries.push_back(DotDebugLocEntry()); + } - DebugLoc DL = MInsn->getDebugLoc(); - if (DL.isUnknown()) continue; - DbgScope *Scope = 0; - if (MDNode *IA = DL.getInlinedAt(Ctx)) - Scope = ConcreteScopes.lookup(IA); - if (Scope == 0) - Scope = DbgScopeMap.lookup(DL.getScope(Ctx)); - - // If variable scope is not found then skip this variable. - if (Scope == 0) + // Collect info for variables that were optimized out. + if (NamedMDNode *NMD = + MF->getFunction()->getParent()->getNamedMetadata("llvm.dbg.lv")) { + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIVariable DV(cast_or_null<MDNode>(NMD->getOperand(i))); + if (!DV || !Processed.insert(DV)) continue; - - DbgVariable *AbsDbgVariable = findAbstractVariable(DV, MInsn, DL); - DbgVariable *RegVar = new DbgVariable(DV, MInsn, AbsDbgVariable); - DbgValueStartMap[MInsn] = RegVar; - Scope->addVariable(RegVar); + DbgScope *Scope = DbgScopeMap.lookup(DV.getContext()); + if (Scope) + Scope->addVariable(new DbgVariable(DV)); } } } +/// getLabelBeforeInsn - Return Label preceding the instruction. +const MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) { + DenseMap<const MachineInstr *, MCSymbol *>::iterator I = + LabelsBeforeInsn.find(MI); + if (I == LabelsBeforeInsn.end()) + // FunctionBeginSym always preceeds all the instruction in current function. + return FunctionBeginSym; + return I->second; +} + +/// getLabelAfterInsn - Return Label immediately following the instruction. +const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) { + DenseMap<const MachineInstr *, MCSymbol *>::iterator I = + LabelsAfterInsn.find(MI); + if (I == LabelsAfterInsn.end()) + return NULL; + return I->second; +} + /// beginScope - Process beginning of a scope. void DwarfDebug::beginScope(const MachineInstr *MI) { - // Check location. - DebugLoc DL = MI->getDebugLoc(); - if (DL.isUnknown()) + if (InsnNeedsLabel.count(MI) == 0) { + LabelsBeforeInsn[MI] = PrevLabel; return; + } - MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext()); - - // FIXME: Should only verify each scope once! - if (!DIScope(Scope).Verify()) + // Check location. + DebugLoc DL = MI->getDebugLoc(); + if (!DL.isUnknown()) { + const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext()); + PrevLabel = recordSourceLine(DL.getLine(), DL.getCol(), Scope); + PrevInstLoc = DL; + LabelsBeforeInsn[MI] = PrevLabel; return; + } - // DBG_VALUE instruction establishes new value. + // If location is unknown then use temp label for this DBG_VALUE + // instruction. if (MI->isDebugValue()) { - DenseMap<const MachineInstr *, DbgVariable *>::iterator DI - = DbgValueStartMap.find(MI); - if (DI != DbgValueStartMap.end()) { - MCSymbol *Label = NULL; - if (DL == PrevInstLoc) - Label = PrevLabel; - else { - Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope); - PrevInstLoc = DL; - PrevLabel = Label; - } - DI->second->setDbgValueLabel(Label); - } + PrevLabel = MMI->getContext().CreateTempSymbol(); + Asm->OutStreamer.EmitLabel(PrevLabel); + LabelsBeforeInsn[MI] = PrevLabel; return; } - // Emit a label to indicate location change. This is used for line - // table even if this instruction does not start a new scope. - MCSymbol *Label = NULL; - if (DL == PrevInstLoc) - Label = PrevLabel; - else { - Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope); - PrevInstLoc = DL; - PrevLabel = Label; + if (UnknownLocations) { + PrevLabel = recordSourceLine(0, 0, 0); + LabelsBeforeInsn[MI] = PrevLabel; + return; } - // If this instruction begins a scope then note down corresponding label. - if (InsnsBeginScopeSet.count(MI) != 0) - LabelsBeforeInsn[MI] = Label; + assert (0 && "Instruction is not processed!"); } /// endScope - Process end of a scope. @@ -2204,7 +2319,7 @@ void DwarfDebug::endScope(const MachineInstr *MI) { } /// getOrCreateDbgScope - Create DbgScope for the scope. -DbgScope *DwarfDebug::getOrCreateDbgScope(MDNode *Scope, MDNode *InlinedAt) { +DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt) { if (!InlinedAt) { DbgScope *WScope = DbgScopeMap.lookup(Scope); if (WScope) @@ -2213,7 +2328,7 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(MDNode *Scope, MDNode *InlinedAt) { DbgScopeMap.insert(std::make_pair(Scope, WScope)); if (DIDescriptor(Scope).isLexicalBlock()) { DbgScope *Parent = - getOrCreateDbgScope(DILexicalBlock(Scope).getContext().getNode(), NULL); + getOrCreateDbgScope(DILexicalBlock(Scope).getContext(), NULL); WScope->setParent(Parent); Parent->addScope(WScope); } @@ -2235,7 +2350,7 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(MDNode *Scope, MDNode *InlinedAt) { DbgScopeMap.insert(std::make_pair(InlinedAt, WScope)); DILocation DL(InlinedAt); DbgScope *Parent = - getOrCreateDbgScope(DL.getScope().getNode(), DL.getOrigLocation().getNode()); + getOrCreateDbgScope(DL.getScope(), DL.getOrigLocation()); WScope->setParent(Parent); Parent->addScope(WScope); @@ -2249,13 +2364,13 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(MDNode *Scope, MDNode *InlinedAt) { /// machine instruction encodes valid location info. static bool hasValidLocation(LLVMContext &Ctx, const MachineInstr *MInsn, - MDNode *&Scope, MDNode *&InlinedAt) { + const MDNode *&Scope, const MDNode *&InlinedAt) { if (MInsn->isDebugValue()) return false; DebugLoc DL = MInsn->getDebugLoc(); if (DL.isUnknown()) return false; - MDNode *S = DL.getScope(Ctx); + const MDNode *S = DL.getScope(Ctx); // There is no need to create another DIE for compile unit. For all // other scopes, create one DbgScope now. This will be translated @@ -2307,8 +2422,8 @@ void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF, for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { const MachineInstr *MInsn = II; - MDNode *Scope = NULL; - MDNode *InlinedAt = NULL; + const MDNode *Scope = NULL; + const MDNode *InlinedAt = NULL; // Check if instruction has valid location information. if (hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) { @@ -2344,8 +2459,8 @@ bool DwarfDebug::extractScopeInformation() { LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); SmallVector<DbgRange, 4> MIRanges; DenseMap<const MachineInstr *, DbgScope *> MI2ScopeMap; - MDNode *PrevScope = NULL; - MDNode *PrevInlinedAt = NULL; + const MDNode *PrevScope = NULL; + const MDNode *PrevInlinedAt = NULL; const MachineInstr *RangeBeginMI = NULL; const MachineInstr *PrevMI = NULL; for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end(); @@ -2353,8 +2468,8 @@ bool DwarfDebug::extractScopeInformation() { for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { const MachineInstr *MInsn = II; - MDNode *Scope = NULL; - MDNode *InlinedAt = NULL; + const MDNode *Scope = NULL; + const MDNode *InlinedAt = NULL; // Check if instruction has valid location information. if (!hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) { @@ -2476,8 +2591,6 @@ static DebugLoc FindFirstDebugLoc(const MachineFunction *MF) { void DwarfDebug::beginFunction(const MachineFunction *MF) { if (!MMI->hasDebugInfo()) return; if (!extractScopeInformation()) return; - - collectVariableInfo(); FunctionBeginSym = Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()); @@ -2489,7 +2602,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { DebugLoc FDL = FindFirstDebugLoc(MF); if (FDL.isUnknown()) return; - MDNode *Scope = FDL.getScope(MF->getFunction()->getContext()); + const MDNode *Scope = FDL.getScope(MF->getFunction()->getContext()); DISubprogram SP = getDISubprogram(Scope); unsigned Line, Col; @@ -2502,6 +2615,40 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { } recordSourceLine(Line, Col, Scope); + + DebugLoc PrevLoc; + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + const MachineInstr *MI = II; + DebugLoc DL = MI->getDebugLoc(); + if (MI->isDebugValue()) { + // DBG_VALUE needs a label if the variable is local variable or + // an argument whose location is changing. + assert (MI->getNumOperands() > 1 && "Invalid machine instruction!"); + DIVariable DV(MI->getOperand(MI->getNumOperands() - 1).getMetadata()); + if (!DV.Verify()) continue; + if (DV.getTag() != dwarf::DW_TAG_arg_variable) + InsnNeedsLabel.insert(MI); + else if (!ProcessedArgs.insert(DV)) + InsnNeedsLabel.insert(MI); + } else { + // If location is unknown then instruction needs a location only if + // UnknownLocations flag is set. + if (DL.isUnknown()) { + if (UnknownLocations && !PrevLoc.isUnknown()) + InsnNeedsLabel.insert(MI); + } else if (DL != PrevLoc) + // Otherwise, instruction needs a location only if it is new location. + InsnNeedsLabel.insert(MI); + } + + if (!DL.isUnknown() || UnknownLocations) + PrevLoc = DL; + } + + PrevLabel = FunctionBeginSym; } /// endFunction - Gather and emit post-function debug information. @@ -2510,10 +2657,15 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { if (!MMI->hasDebugInfo() || DbgScopeMap.empty()) return; if (CurrentFnDbgScope) { + // Define end label for subprogram. - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("func_end", - Asm->getFunctionNumber())); + FunctionEndSym = Asm->GetTempSymbol("func_end", + Asm->getFunctionNumber()); + // Assumes in correct section after the entry point. + Asm->OutStreamer.EmitLabel(FunctionEndSym); + collectVariableInfo(MF); + // Get function line info. if (!Lines.empty()) { // Get section line info. @@ -2543,10 +2695,15 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // Clear debug info CurrentFnDbgScope = NULL; + InsnNeedsLabel.clear(); + ProcessedArgs.clear(); + DbgVariableToFrameIndexMap.clear(); + VarToAbstractVarMap.clear(); + DbgVariableToDbgInstMap.clear(); + DbgVariableLabelsMap.clear(); DeleteContainerSeconds(DbgScopeMap); InsnsBeginScopeSet.clear(); InsnsEndScopeSet.clear(); - DbgValueStartMap.clear(); ConcreteScopes.clear(); DeleteContainerSeconds(AbstractScopes); AbstractScopesList.clear(); @@ -2557,30 +2714,82 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { PrevLabel = NULL; } +/// recordVariableFrameIndex - Record a variable's index. +void DwarfDebug::recordVariableFrameIndex(const DbgVariable *V, int Index) { + assert (V && "Invalid DbgVariable!"); + DbgVariableToFrameIndexMap[V] = Index; +} + +/// findVariableFrameIndex - Return true if frame index for the variable +/// is found. Update FI to hold value of the index. +bool DwarfDebug::findVariableFrameIndex(const DbgVariable *V, int *FI) { + assert (V && "Invalid DbgVariable!"); + DenseMap<const DbgVariable *, int>::iterator I = + DbgVariableToFrameIndexMap.find(V); + if (I == DbgVariableToFrameIndexMap.end()) + return false; + *FI = I->second; + return true; +} + +/// findVariableLabel - Find MCSymbol for the variable. +const MCSymbol *DwarfDebug::findVariableLabel(const DbgVariable *V) { + DenseMap<const DbgVariable *, const MCSymbol *>::iterator I + = DbgVariableLabelsMap.find(V); + if (I == DbgVariableLabelsMap.end()) + return NULL; + else return I->second; +} + +/// findDbgScope - Find DbgScope for the debug loc attached with an +/// instruction. +DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) { + DbgScope *Scope = NULL; + LLVMContext &Ctx = + MInsn->getParent()->getParent()->getFunction()->getContext(); + DebugLoc DL = MInsn->getDebugLoc(); + + if (DL.isUnknown()) + return Scope; + + if (const MDNode *IA = DL.getInlinedAt(Ctx)) + Scope = ConcreteScopes.lookup(IA); + if (Scope == 0) + Scope = DbgScopeMap.lookup(DL.getScope(Ctx)); + + return Scope; +} + + /// recordSourceLine - Register a source line with debug info. Returns the /// unique label that was emitted and which provides correspondence to /// the source line list. -MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, MDNode *S) { +MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S) { StringRef Dir; StringRef Fn; - DIDescriptor Scope(S); - if (Scope.isCompileUnit()) { - DICompileUnit CU(S); - Dir = CU.getDirectory(); - Fn = CU.getFilename(); - } else if (Scope.isSubprogram()) { - DISubprogram SP(S); - Dir = SP.getDirectory(); - Fn = SP.getFilename(); - } else if (Scope.isLexicalBlock()) { - DILexicalBlock DB(S); - Dir = DB.getDirectory(); - Fn = DB.getFilename(); - } else - assert(0 && "Unexpected scope info"); + unsigned Src = 1; + if (S) { + DIDescriptor Scope(S); + + if (Scope.isCompileUnit()) { + DICompileUnit CU(S); + Dir = CU.getDirectory(); + Fn = CU.getFilename(); + } else if (Scope.isSubprogram()) { + DISubprogram SP(S); + Dir = SP.getDirectory(); + Fn = SP.getFilename(); + } else if (Scope.isLexicalBlock()) { + DILexicalBlock DB(S); + Dir = DB.getDirectory(); + Fn = DB.getFilename(); + } else + assert(0 && "Unexpected scope info"); + + Src = GetOrCreateSourceID(Dir, Fn); + } - unsigned Src = GetOrCreateSourceID(Dir, Fn); MCSymbol *Label = MMI->getContext().CreateTempSymbol(); Lines.push_back(SrcLineInfo(Line, Col, Src, Label)); @@ -2643,14 +2852,18 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) { /// computeSizeAndOffsets - Compute the size and offset of all the DIEs. /// void DwarfDebug::computeSizeAndOffsets() { - // Compute size of compile unit header. - static unsigned Offset = - sizeof(int32_t) + // Length of Compilation Unit Info - sizeof(int16_t) + // DWARF version number - sizeof(int32_t) + // Offset Into Abbrev. Section - sizeof(int8_t); // Pointer Size (in bytes) - - computeSizeAndOffset(ModuleCU->getCUDie(), Offset, true); + unsigned PrevOffset = 0; + for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + // Compute size of compile unit header. + static unsigned Offset = PrevOffset + + sizeof(int32_t) + // Length of Compilation Unit Info + sizeof(int16_t) + // DWARF version number + sizeof(int32_t) + // Offset Into Abbrev. Section + sizeof(int8_t); // Pointer Size (in bytes) + computeSizeAndOffset(I->second->getCUDie(), Offset, true); + PrevOffset = Offset; + } } /// EmitSectionSym - Switch to the specified MCSection and emit an assembler @@ -2694,6 +2907,9 @@ void DwarfDebug::EmitSectionLabels() { DwarfDebugRangeSectionSym = EmitSectionSym(Asm, TLOF.getDwarfRangesSection(), "debug_range"); + DwarfDebugLocSectionSym = EmitSectionSym(Asm, TLOF.getDwarfLocSection(), + "section_debug_loc"); + TextSectionSym = EmitSectionSym(Asm, TLOF.getTextSection(), "text_begin"); EmitSectionSym(Asm, TLOF.getDataSection()); } @@ -2745,6 +2961,14 @@ void DwarfDebug::emitDIE(DIE *Die) { 4); break; } + case dwarf::DW_AT_location: { + if (UseDotDebugLocEntry.count(Die) != 0) { + DIELabel *L = cast<DIELabel>(Values[i]); + Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4); + } else + Values[i]->EmitValue(Asm, Form); + break; + } default: // Emit an attribute using the defined form. Values[i]->EmitValue(Asm, Form); @@ -2771,37 +2995,41 @@ void DwarfDebug::emitDebugInfo() { // Start debug info section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfInfoSection()); - DIE *Die = ModuleCU->getCUDie(); - - // Emit the compile units header. - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_begin", - ModuleCU->getID())); - - // Emit size of content not including length itself - unsigned ContentSize = Die->getSize() + - sizeof(int16_t) + // DWARF version number - sizeof(int32_t) + // Offset Into Abbrev. Section - sizeof(int8_t) + // Pointer Size (in bytes) - sizeof(int32_t); // FIXME - extra pad for gdb bug. - - Asm->OutStreamer.AddComment("Length of Compilation Unit Info"); - Asm->EmitInt32(ContentSize); - Asm->OutStreamer.AddComment("DWARF version number"); - Asm->EmitInt16(dwarf::DWARF_VERSION); - Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); - Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"), - DwarfAbbrevSectionSym); - Asm->OutStreamer.AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getTargetData().getPointerSize()); - - emitDIE(Die); - // FIXME - extra padding for gdb bug. - Asm->OutStreamer.AddComment("4 extra padding bytes for GDB"); - Asm->EmitInt8(0); - Asm->EmitInt8(0); - Asm->EmitInt8(0); - Asm->EmitInt8(0); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", ModuleCU->getID())); + for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + DIE *Die = TheCU->getCUDie(); + + // Emit the compile units header. + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_begin", + TheCU->getID())); + + // Emit size of content not including length itself + unsigned ContentSize = Die->getSize() + + sizeof(int16_t) + // DWARF version number + sizeof(int32_t) + // Offset Into Abbrev. Section + sizeof(int8_t) + // Pointer Size (in bytes) + sizeof(int32_t); // FIXME - extra pad for gdb bug. + + Asm->OutStreamer.AddComment("Length of Compilation Unit Info"); + Asm->EmitInt32(ContentSize); + Asm->OutStreamer.AddComment("DWARF version number"); + Asm->EmitInt16(dwarf::DWARF_VERSION); + Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); + Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"), + DwarfAbbrevSectionSym); + Asm->OutStreamer.AddComment("Address Size (in bytes)"); + Asm->EmitInt8(Asm->getTargetData().getPointerSize()); + + emitDIE(Die); + // FIXME - extra padding for gdb bug. + Asm->OutStreamer.AddComment("4 extra padding bytes for GDB"); + Asm->EmitInt8(0); + Asm->EmitInt8(0); + Asm->EmitInt8(0); + Asm->EmitInt8(0); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID())); + } } /// emitAbbreviations - Emit the abbreviation section. @@ -2967,8 +3195,6 @@ void DwarfDebug::emitDebugLines() { MCSymbol *Label = LineInfo.getLabel(); if (!Label->isDefined()) continue; // Not emitted, in dead code. - if (LineInfo.getLine() == 0) continue; - if (Asm->isVerbose()) { std::pair<unsigned, unsigned> SrcID = getSourceDirectoryAndFileIds(LineInfo.getSourceID()); @@ -3128,91 +3354,99 @@ emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) { /// emitDebugPubNames - Emit visible names into a debug pubnames section. /// void DwarfDebug::emitDebugPubNames() { - // Start the dwarf pubnames section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfPubNamesSection()); - - Asm->OutStreamer.AddComment("Length of Public Names Info"); - Asm->EmitLabelDifference( - Asm->GetTempSymbol("pubnames_end", ModuleCU->getID()), - Asm->GetTempSymbol("pubnames_begin", ModuleCU->getID()), 4); - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", - ModuleCU->getID())); - - Asm->OutStreamer.AddComment("DWARF Version"); - Asm->EmitInt16(dwarf::DWARF_VERSION); - - Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); - Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", ModuleCU->getID()), - DwarfInfoSectionSym); - - Asm->OutStreamer.AddComment("Compilation Unit Length"); - Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", ModuleCU->getID()), - Asm->GetTempSymbol("info_begin", ModuleCU->getID()), - 4); - - const StringMap<DIE*> &Globals = ModuleCU->getGlobals(); - for (StringMap<DIE*>::const_iterator - GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { - const char *Name = GI->getKeyData(); - DIE *Entity = GI->second; - - Asm->OutStreamer.AddComment("DIE offset"); - Asm->EmitInt32(Entity->getOffset()); + for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + // Start the dwarf pubnames section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfPubNamesSection()); - if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("External Name"); - Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0); + Asm->OutStreamer.AddComment("Length of Public Names Info"); + Asm->EmitLabelDifference( + Asm->GetTempSymbol("pubnames_end", TheCU->getID()), + Asm->GetTempSymbol("pubnames_begin", TheCU->getID()), 4); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", + TheCU->getID())); + + Asm->OutStreamer.AddComment("DWARF Version"); + Asm->EmitInt16(dwarf::DWARF_VERSION); + + Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); + Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()), + DwarfInfoSectionSym); + + Asm->OutStreamer.AddComment("Compilation Unit Length"); + Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()), + Asm->GetTempSymbol("info_begin", TheCU->getID()), + 4); + + const StringMap<DIE*> &Globals = TheCU->getGlobals(); + for (StringMap<DIE*>::const_iterator + GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + DIE *Entity = GI->second; + + Asm->OutStreamer.AddComment("DIE offset"); + Asm->EmitInt32(Entity->getOffset()); + + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment("External Name"); + Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0); + } + + Asm->OutStreamer.AddComment("End Mark"); + Asm->EmitInt32(0); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end", + TheCU->getID())); } - - Asm->OutStreamer.AddComment("End Mark"); - Asm->EmitInt32(0); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end", - ModuleCU->getID())); } void DwarfDebug::emitDebugPubTypes() { - // Start the dwarf pubnames section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfPubTypesSection()); - Asm->OutStreamer.AddComment("Length of Public Types Info"); - Asm->EmitLabelDifference( - Asm->GetTempSymbol("pubtypes_end", ModuleCU->getID()), - Asm->GetTempSymbol("pubtypes_begin", ModuleCU->getID()), 4); - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin", - ModuleCU->getID())); - - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version"); - Asm->EmitInt16(dwarf::DWARF_VERSION); - - Asm->OutStreamer.AddComment("Offset of Compilation ModuleCU Info"); - Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", ModuleCU->getID()), - DwarfInfoSectionSym); - - Asm->OutStreamer.AddComment("Compilation ModuleCU Length"); - Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", ModuleCU->getID()), - Asm->GetTempSymbol("info_begin", ModuleCU->getID()), - 4); - - const StringMap<DIE*> &Globals = ModuleCU->getGlobalTypes(); - for (StringMap<DIE*>::const_iterator - GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { - const char *Name = GI->getKeyData(); - DIE * Entity = GI->second; - - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset"); - Asm->EmitInt32(Entity->getOffset()); + for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + // Start the dwarf pubnames section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfPubTypesSection()); + Asm->OutStreamer.AddComment("Length of Public Types Info"); + Asm->EmitLabelDifference( + Asm->GetTempSymbol("pubtypes_end", TheCU->getID()), + Asm->GetTempSymbol("pubtypes_begin", TheCU->getID()), 4); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin", + TheCU->getID())); - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); - Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0); + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version"); + Asm->EmitInt16(dwarf::DWARF_VERSION); + + Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); + Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()), + DwarfInfoSectionSym); + + Asm->OutStreamer.AddComment("Compilation Unit Length"); + Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()), + Asm->GetTempSymbol("info_begin", TheCU->getID()), + 4); + + const StringMap<DIE*> &Globals = TheCU->getGlobalTypes(); + for (StringMap<DIE*>::const_iterator + GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + DIE * Entity = GI->second; + + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset"); + Asm->EmitInt32(Entity->getOffset()); + + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); + Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0); + } + + Asm->OutStreamer.AddComment("End Mark"); + Asm->EmitInt32(0); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end", + TheCU->getID())); } - - Asm->OutStreamer.AddComment("End Mark"); - Asm->EmitInt32(0); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end", - ModuleCU->getID())); } /// emitDebugStr - Emit visible names into a debug str section. @@ -3248,9 +3482,39 @@ void DwarfDebug::emitDebugStr() { /// emitDebugLoc - Emit visible names into a debug loc section. /// void DwarfDebug::emitDebugLoc() { + if (DotDebugLocEntries.empty()) + return; + // Start the dwarf loc section. Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfLocSection()); + Asm->getObjFileLowering().getDwarfLocSection()); + unsigned char Size = Asm->getTargetData().getPointerSize(); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0)); + unsigned index = 1; + for (SmallVector<DotDebugLocEntry, 4>::iterator I = DotDebugLocEntries.begin(), + E = DotDebugLocEntries.end(); I != E; ++I, ++index) { + DotDebugLocEntry Entry = *I; + if (Entry.isEmpty()) { + Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0); + Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index)); + } else { + Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size, 0); + Asm->OutStreamer.EmitSymbolValue(Entry.End, Size, 0); + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + unsigned Reg = RI->getDwarfRegNum(Entry.Loc.getReg(), false); + if (Reg < 32) { + Asm->OutStreamer.AddComment("Loc expr size"); + Asm->EmitInt16(1); + Asm->EmitInt8(dwarf::DW_OP_reg0 + Reg); + } else { + Asm->OutStreamer.AddComment("Loc expr size"); + Asm->EmitInt16(1+MCAsmInfo::getULEB128Size(Reg)); + Asm->EmitInt8(dwarf::DW_OP_regx); + Asm->EmitULEB128(Reg); + } + } + } } /// EmitDebugARanges - Emit visible names into a debug aranges section. @@ -3310,7 +3574,7 @@ void DwarfDebug::emitDebugInlineInfo() { if (!Asm->MAI->doesDwarfUsesInlineInfoSection()) return; - if (!ModuleCU) + if (!FirstCU) return; Asm->OutStreamer.SwitchSection( @@ -3327,11 +3591,11 @@ void DwarfDebug::emitDebugInlineInfo() { Asm->OutStreamer.AddComment("Address Size (in bytes)"); Asm->EmitInt8(Asm->getTargetData().getPointerSize()); - for (SmallVector<MDNode *, 4>::iterator I = InlinedSPNodes.begin(), + for (SmallVector<const MDNode *, 4>::iterator I = InlinedSPNodes.begin(), E = InlinedSPNodes.end(); I != E; ++I) { - MDNode *Node = *I; - DenseMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II + const MDNode *Node = *I; + DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II = InlineInfo.find(Node); SmallVector<InlineInfoLabels, 4> &Labels = II->second; DISubprogram SP(Node); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index b964b23..0d6116f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -15,6 +15,7 @@ #define CODEGEN_ASMPRINTER_DWARFDEBUG_H__ #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineLocation.h" #include "DIE.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" @@ -30,7 +31,6 @@ class DbgConcreteScope; class DbgScope; class DbgVariable; class MachineFrameInfo; -class MachineLocation; class MachineModuleInfo; class MachineOperand; class MCAsmInfo; @@ -82,8 +82,8 @@ class DwarfDebug { // Attributes used to construct specific Dwarf sections. // - /// ModuleCU - All DIEs are inserted in ModuleCU. - CompileUnit *ModuleCU; + CompileUnit *FirstCU; + DenseMap <const MDNode *, CompileUnit *> CUMap; /// AbbreviationsSet - Used to uniquely define abbreviations. /// @@ -146,15 +146,15 @@ class DwarfDebug { /// DbgScopeMap - Tracks the scopes in the current function. Owns the /// contained DbgScope*s. /// - DenseMap<MDNode *, DbgScope *> DbgScopeMap; + DenseMap<const MDNode *, DbgScope *> DbgScopeMap; /// ConcreteScopes - Tracks the concrete scopees in the current function. /// These scopes are also included in DbgScopeMap. - DenseMap<MDNode *, DbgScope *> ConcreteScopes; + DenseMap<const MDNode *, DbgScope *> ConcreteScopes; /// AbstractScopes - Tracks the abstract scopes a module. These scopes are /// not included DbgScopeMap. AbstractScopes owns its DbgScope*s. - DenseMap<MDNode *, DbgScope *> AbstractScopes; + DenseMap<const MDNode *, DbgScope *> AbstractScopes; /// AbstractScopesList - Tracks abstract scopes constructed while processing /// a function. This list is cleared during endFunction(). @@ -162,13 +162,43 @@ class DwarfDebug { /// AbstractVariables - Collection on abstract variables. Owned by the /// DbgScopes in AbstractScopes. - DenseMap<MDNode *, DbgVariable *> AbstractVariables; - - /// DbgValueStartMap - Tracks starting scope of variable DIEs. - /// If the scope of an object begins sometime after the low pc value for the - /// scope most closely enclosing the object, the object entry may have a - /// DW_AT_start_scope attribute. - DenseMap<const MachineInstr *, DbgVariable *> DbgValueStartMap; + DenseMap<const MDNode *, DbgVariable *> AbstractVariables; + + /// DbgVariableToFrameIndexMap - Tracks frame index used to find + /// variable's value. + DenseMap<const DbgVariable *, int> DbgVariableToFrameIndexMap; + + /// DbgVariableToDbgInstMap - Maps DbgVariable to corresponding DBG_VALUE + /// machine instruction. + DenseMap<const DbgVariable *, const MachineInstr *> DbgVariableToDbgInstMap; + + /// DbgVariableLabelsMap - Maps DbgVariable to corresponding MCSymbol. + DenseMap<const DbgVariable *, const MCSymbol *> DbgVariableLabelsMap; + + /// DotDebugLocEntry - This struct describes location entries emitted in + /// .debug_loc section. + typedef struct DotDebugLocEntry { + const MCSymbol *Begin; + const MCSymbol *End; + MachineLocation Loc; + DotDebugLocEntry() : Begin(0), End(0) {} + DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, + MachineLocation &L) : Begin(B), End(E), Loc(L) {} + /// Empty entries are also used as a trigger to emit temp label. Such + /// labels are referenced is used to find debug_loc offset for a given DIE. + bool isEmpty() { return Begin == 0 && End == 0; } + } DotDebugLocEntry; + + /// DotDebugLocEntries - Collection of DotDebugLocEntry. + SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries; + + /// UseDotDebugLocEntry - DW_AT_location attributes for the DIEs in this set + /// idetifies corresponding .debug_loc entry offset. + SmallPtrSet<const DIE *, 4> UseDotDebugLocEntry; + + /// VarToAbstractVarMap - Maps DbgVariable with corresponding Abstract + /// DbgVariable, if any. + DenseMap<const DbgVariable *, const DbgVariable *> VarToAbstractVarMap; /// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked /// (at the end of the module) as DW_AT_inline. @@ -177,7 +207,7 @@ class DwarfDebug { /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that /// need DW_AT_containing_type attribute. This attribute points to a DIE that /// corresponds to the MDNode mapped with the subprogram DIE. - DenseMap<DIE *, MDNode *> ContainingTypeMap; + DenseMap<DIE *, const MDNode *> ContainingTypeMap; typedef SmallVector<DbgScope *, 2> ScopeVector; SmallPtrSet<const MachineInstr *, 8> InsnsBeginScopeSet; @@ -185,9 +215,9 @@ class DwarfDebug { /// InlineInfo - Keep track of inlined functions and their location. This /// information is used to populate debug_inlined section. - typedef std::pair<MCSymbol*, DIE *> InlineInfoLabels; - DenseMap<MDNode*, SmallVector<InlineInfoLabels, 4> > InlineInfo; - SmallVector<MDNode *, 4> InlinedSPNodes; + typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels; + DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo; + SmallVector<const MDNode *, 4> InlinedSPNodes; /// LabelsBeforeInsn - Maps instruction with label emitted before /// instruction. @@ -197,6 +227,13 @@ class DwarfDebug { /// instruction. DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn; + /// insnNeedsLabel - Collection of instructions that need a label to mark + /// a debuggging information entity. + SmallPtrSet<const MachineInstr *, 8> InsnNeedsLabel; + + /// ProcessedArgs - Collection of arguments already processed. + SmallPtrSet<const MDNode *, 8> ProcessedArgs; + SmallVector<const MCSymbol *, 8> DebugRangeSymbols; /// Previous instruction's location information. This is used to determine @@ -219,8 +256,8 @@ class DwarfDebug { // section offsets and are created by EmitSectionLabels. MCSymbol *DwarfFrameSectionSym, *DwarfInfoSectionSym, *DwarfAbbrevSectionSym; MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym; - - MCSymbol *FunctionBeginSym; + MCSymbol *DwarfDebugLocSectionSym; + MCSymbol *FunctionBeginSym, *FunctionEndSym; private: /// getSourceDirectoryAndFileIds - Return the directory and file ids that @@ -295,7 +332,7 @@ private: /// addSourceLine - Add location information to specified debug information /// entry. void addSourceLine(DIE *Die, const DIVariable *V); - void addSourceLine(DIE *Die, const DIGlobal *G); + void addSourceLine(DIE *Die, const DIGlobalVariable *G); void addSourceLine(DIE *Die, const DISubprogram *SP); void addSourceLine(DIE *Die, const DIType *Ty); void addSourceLine(DIE *Die, const DINameSpace *NS); @@ -306,13 +343,13 @@ private: const MachineLocation &Location); /// addRegisterAddress - Add register location entry in variable DIE. - bool addRegisterAddress(DIE *Die, DbgVariable *DV, const MachineOperand &MO); + bool addRegisterAddress(DIE *Die, const MCSymbol *VS, const MachineOperand &MO); /// addConstantValue - Add constant value entry in variable DIE. - bool addConstantValue(DIE *Die, DbgVariable *DV, const MachineOperand &MO); + bool addConstantValue(DIE *Die, const MCSymbol *VS, const MachineOperand &MO); /// addConstantFPValue - Add constant value entry in variable DIE. - bool addConstantFPValue(DIE *Die, DbgVariable *DV, const MachineOperand &MO); + bool addConstantFPValue(DIE *Die, const MCSymbol *VS, const MachineOperand &MO); /// addComplexAddress - Start with the address based on the location provided, /// and generate the DWARF information necessary to find the actual variable @@ -380,21 +417,18 @@ private: DIE *createSubprogramDIE(const DISubprogram &SP, bool MakeDecl = false); /// getOrCreateDbgScope - Create DbgScope for the scope. - DbgScope *getOrCreateDbgScope(MDNode *Scope, MDNode *InlinedAt); + DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt); - DbgScope *getOrCreateAbstractScope(MDNode *N); + DbgScope *getOrCreateAbstractScope(const MDNode *N); /// findAbstractVariable - Find abstract variable associated with Var. - DbgVariable *findAbstractVariable(DIVariable &Var, unsigned FrameIdx, - DebugLoc Loc); - DbgVariable *findAbstractVariable(DIVariable &Var, const MachineInstr *MI, - DebugLoc Loc); + DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc); /// updateSubprogramScopeDIE - Find DIE for the given subprogram and /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes. /// If there are global variables in this scope then create and insert /// DIEs for these variables. - DIE *updateSubprogramScopeDIE(MDNode *SPNode); + DIE *updateSubprogramScopeDIE(const MDNode *SPNode); /// constructLexicalScope - Construct new DW_TAG_lexical_block /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels. @@ -506,11 +540,18 @@ private: /// maps as well. unsigned GetOrCreateSourceID(StringRef DirName, StringRef FileName); - void constructCompileUnit(MDNode *N); + /// constructCompileUnit - Create new CompileUnit for the given + /// metadata node with tag DW_TAG_compile_unit. + void constructCompileUnit(const MDNode *N); + + /// getCompielUnit - Get CompileUnit DIE. + CompileUnit *getCompileUnit(const MDNode *N) const; - void constructGlobalVariableDIE(MDNode *N); + /// constructGlobalVariableDIE - Construct global variable DIE. + void constructGlobalVariableDIE(const MDNode *N); - void constructSubprogramDIE(MDNode *N); + /// construct SubprogramDIE - Construct subprogram DIE. + void constructSubprogramDIE(const MDNode *N); // FIXME: This should go away in favor of complex addresses. /// Find the type the programmer originally declared the variable to be @@ -521,7 +562,7 @@ private: /// recordSourceLine - Register a source line with debug info. Returns the /// unique label that was emitted and which provides correspondence to /// the source line list. - MCSymbol *recordSourceLine(unsigned Line, unsigned Col, MDNode *Scope); + MCSymbol *recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope); /// getSourceLineCount - Return the number of source lines in the debug /// info. @@ -529,6 +570,20 @@ private: return Lines.size(); } + /// recordVariableFrameIndex - Record a variable's index. + void recordVariableFrameIndex(const DbgVariable *V, int Index); + + /// findVariableFrameIndex - Return true if frame index for the variable + /// is found. Update FI to hold value of the index. + bool findVariableFrameIndex(const DbgVariable *V, int *FI); + + /// findVariableLabel - Find MCSymbol for the variable. + const MCSymbol *findVariableLabel(const DbgVariable *V); + + /// findDbgScope - Find DbgScope for the debug loc attached with an + /// instruction. + DbgScope *findDbgScope(const MachineInstr *MI); + /// identifyScopeMarkers() - Indentify instructions that are marking /// beginning of or end of a scope. void identifyScopeMarkers(); @@ -538,8 +593,12 @@ private: bool extractScopeInformation(); /// collectVariableInfo - Populate DbgScope entries with variables' info. - void collectVariableInfo(); + void collectVariableInfo(const MachineFunction *); + /// collectVariableInfoFromMMITable - Collect variable information from + /// side table maintained by MMI. + void collectVariableInfoFromMMITable(const MachineFunction * MF, + SmallPtrSet<const MDNode *, 16> &P); public: //===--------------------------------------------------------------------===// // Main entry points. @@ -563,6 +622,12 @@ public: /// void endFunction(const MachineFunction *MF); + /// getLabelBeforeInsn - Return Label preceding the instruction. + const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); + + /// getLabelAfterInsn - Return Label immediately following the instruction. + const MCSymbol *getLabelAfterInsn(const MachineInstr *MI); + /// beginScope - Process beginning of a scope. void beginScope(const MachineInstr *MI); diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 0ff1036..c872840 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -189,7 +189,7 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) { // EH Frame, but some environments do not handle weak absolute symbols. If // UnwindTablesMandatory is set we cannot do this optimization; the unwind // info is to be available for non-EH uses. - if (!EHFrameInfo.hasCalls && !UnwindTablesMandatory && + if (!EHFrameInfo.adjustsStack && !UnwindTablesMandatory && (!TheFunc->isWeakForLinker() || !Asm->MAI->getWeakDefDirective() || TLOF.getSupportsWeakOmittedEHFrame())) { @@ -949,11 +949,12 @@ void DwarfException::EndFunction() { TLOF.isFunctionEHFrameSymbolPrivate()); // Save EH frame information - EHFrames.push_back(FunctionEHFrameInfo(FunctionEHSym, - Asm->getFunctionNumber(), - MMI->getPersonalityIndex(), - Asm->MF->getFrameInfo()->hasCalls(), - !MMI->getLandingPads().empty(), - MMI->getFrameMoves(), - Asm->MF->getFunction())); + EHFrames. + push_back(FunctionEHFrameInfo(FunctionEHSym, + Asm->getFunctionNumber(), + MMI->getPersonalityIndex(), + Asm->MF->getFrameInfo()->adjustsStack(), + !MMI->getLandingPads().empty(), + MMI->getFrameMoves(), + Asm->MF->getFunction())); } diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index 5839f8c..bc311e6 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -45,7 +45,7 @@ class DwarfException { MCSymbol *FunctionEHSym; // L_foo.eh unsigned Number; unsigned PersonalityIndex; - bool hasCalls; + bool adjustsStack; bool hasLandingPads; std::vector<MachineMove> Moves; const Function *function; @@ -55,7 +55,7 @@ class DwarfException { const std::vector<MachineMove> &M, const Function *f): FunctionEHSym(EHSym), Number(Num), PersonalityIndex(P), - hasCalls(hC), hasLandingPads(hL), Moves(M), function (f) { } + adjustsStack(hC), hasLandingPads(hL), Moves(M), function (f) { } }; std::vector<FunctionEHFrameInfo> EHFrames; diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index a8c3c7b..f92127f 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -104,6 +104,21 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); EmitCamlGlobal(getModule(), AP, "frametable"); + int NumDescriptors = 0; + for (iterator I = begin(), IE = end(); I != IE; ++I) { + GCFunctionInfo &FI = **I; + for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) { + NumDescriptors++; + } + } + + if (NumDescriptors >= 1<<16) { + // Very rude! + report_fatal_error(" Too much descriptor for ocaml GC"); + } + AP.EmitInt16(NumDescriptors); + AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); + for (iterator I = begin(), IE = end(); I != IE; ++I) { GCFunctionInfo &FI = **I; @@ -135,11 +150,13 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { for (GCFunctionInfo::live_iterator K = FI.live_begin(J), KE = FI.live_end(J); K != KE; ++K) { - assert(K->StackOffset < 1<<16 && - "GC root stack offset is outside of fixed stack frame and out " - "of range for ocaml GC!"); - - AP.EmitInt32(K->StackOffset); + if (K->StackOffset >= 1<<16) { + // Very rude! + report_fatal_error( + "GC root stack offset is outside of fixed stack frame and out " + "of range for ocaml GC!"); + } + AP.EmitInt16(K->StackOffset); } AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 759fbaa..fd957b1 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -26,7 +26,7 @@ using namespace llvm; CriticalAntiDepBreaker:: -CriticalAntiDepBreaker(MachineFunction& MFi) : +CriticalAntiDepBreaker(MachineFunction& MFi) : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), TRI(MF.getTarget().getRegisterInfo()), @@ -172,7 +172,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { unsigned Reg = MO.getReg(); if (Reg == 0) continue; const TargetRegisterClass *NewRC = 0; - + if (i < MI->getDesc().getNumOperands()) NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); @@ -422,7 +422,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // breaking anti-dependence edges that aren't going to significantly // impact the overall schedule. There are a limited number of registers // and we want to save them for the important edges. - // + // // TODO: Instructions with multiple defs could have multiple // anti-dependencies. The current code here only knows how to break one // edge per instruction. Note that we'd have to be able to break all of diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index e1c52f7..63bb5f2 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -83,6 +83,12 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, return NewCI; } +// VisualStudio defines setjmp as _setjmp +#if defined(_MSC_VER) && defined(setjmp) +#define setjmp_undefined_for_visual_studio +#undef setjmp +#endif + void IntrinsicLowering::AddPrototypes(Module &M) { LLVMContext &Context = M.getContext(); for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 331dc7d..b584704 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -65,6 +65,12 @@ static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden, cl::desc("Print LLVM IR input to isel pass")); static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden, cl::desc("Dump garbage collector data")); +static cl::opt<bool> ShowMCEncoding("show-mc-encoding", cl::Hidden, + cl::desc("Show encoding in .s output")); +static cl::opt<bool> ShowMCInst("show-mc-inst", cl::Hidden, + cl::desc("Show instruction structure in .s output")); +static cl::opt<bool> EnableMCLogging("enable-mc-api-logging", cl::Hidden, + cl::desc("Enable MC API logging")); static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden, cl::desc("Verify generated machine code"), cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); @@ -131,21 +137,33 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_AssemblyFile: { MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI); + + // Create a code emitter if asked to show the encoding. + // + // FIXME: These are currently leaked. + MCCodeEmitter *MCE = 0; + if (ShowMCEncoding) + MCE = getTarget().createCodeEmitter(*this, *Context); + AsmStreamer.reset(createAsmStreamer(*Context, Out, getTargetData()->isLittleEndian(), getVerboseAsm(), InstPrinter, - /*codeemitter*/0)); + MCE, ShowMCInst)); break; } case CGFT_ObjectFile: { // Create the code emitter for the target if it exists. If not, .o file // emission fails. + // + // FIXME: These are currently leaked. MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Context); TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple); if (MCE == 0 || TAB == 0) return true; - - AsmStreamer.reset(createMachOStreamer(*Context, *TAB, Out, MCE)); + + AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Context, + *TAB, Out, MCE, + hasMCRelaxAll())); break; } case CGFT_Null: @@ -154,7 +172,10 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, AsmStreamer.reset(createNullStreamer(*Context)); break; } - + + if (EnableMCLogging) + AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs())); + // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); if (Printer == 0) diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index f1bd573..03b4eab 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -68,7 +68,7 @@ SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) { return OnlyAvailablePred; } -void LatencyPriorityQueue::push_impl(SUnit *SU) { +void LatencyPriorityQueue::push(SUnit *SU) { // Look at all of the successors of this node. Count the number of nodes that // this node is the sole unscheduled node for. unsigned NumNodesBlocking = 0; @@ -79,7 +79,7 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) { } NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; - Queue.push(SU); + Queue.push_back(SU); } @@ -114,3 +114,25 @@ void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) { // NumNodesSolelyBlocking value. push(OnlyAvailablePred); } + +SUnit *LatencyPriorityQueue::pop() { + if (empty()) return NULL; + std::vector<SUnit *>::iterator Best = Queue.begin(); + for (std::vector<SUnit *>::iterator I = next(Queue.begin()), + E = Queue.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + SUnit *V = *Best; + if (Best != prior(Queue.end())) + std::swap(*Best, Queue.back()); + Queue.pop_back(); + return V; +} + +void LatencyPriorityQueue::remove(SUnit *SU) { + assert(!Queue.empty() && "Queue is empty!"); + std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU); + if (I != prior(Queue.end())) + std::swap(*I, Queue.back()); + Queue.pop_back(); +} diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index ca9921c..a6d38ad 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -263,7 +263,7 @@ static void printRegName(unsigned reg, const TargetRegisterInfo* tri_) { #endif static -bool MultipleDefsByMI(const MachineInstr &MI, unsigned MOIdx) { +bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) { unsigned Reg = MI.getOperand(MOIdx).getReg(); for (unsigned i = MOIdx+1, e = MI.getNumOperands(); i < e; ++i) { const MachineOperand &MO = MI.getOperand(i); @@ -279,6 +279,24 @@ bool MultipleDefsByMI(const MachineInstr &MI, unsigned MOIdx) { return false; } +/// isPartialRedef - Return true if the specified def at the specific index is +/// partially re-defining the specified live interval. A common case of this is +/// a definition of the sub-register. +bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO, + LiveInterval &interval) { + if (!MO.getSubReg() || MO.isEarlyClobber()) + return false; + + SlotIndex RedefIndex = MIIdx.getDefIndex(); + const LiveRange *OldLR = + interval.getLiveRangeContaining(RedefIndex.getUseIndex()); + if (OldLR->valno->isDefAccurate()) { + MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def); + return DefMI->findRegisterDefOperandIdx(interval.reg) != -1; + } + return false; +} + void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, MachineBasicBlock::iterator mi, SlotIndex MIIdx, @@ -302,15 +320,20 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // of inputs. if (MO.isEarlyClobber()) defIndex = MIIdx.getUseIndex(); - VNInfo *ValNo; + + // Make sure the first definition is not a partial redefinition. Add an + // <imp-def> of the full register. + if (MO.getSubReg()) + mi->addRegisterDefined(interval.reg); + MachineInstr *CopyMI = NULL; unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (mi->isExtractSubreg() || mi->isInsertSubreg() || mi->isSubregToReg() || tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) CopyMI = mi; - // Earlyclobbers move back one. - ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator); + VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, true, + VNInfoAllocator); assert(ValNo->id == 0 && "First value in interval is not 0?"); // Loop over all of the blocks that the vreg is defined in. There are @@ -389,9 +412,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, } } else { - if (MultipleDefsByMI(*mi, MOIdx)) - // Mutple defs of the same virtual register by the same instruction. e.g. - // %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ... + if (MultipleDefsBySameMI(*mi, MOIdx)) + // Multiple defs of the same virtual register by the same instruction. + // e.g. %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ... // This is likely due to elimination of REG_SEQUENCE instructions. Return // here since there is nothing to do. return; @@ -400,13 +423,21 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // must be due to phi elimination or two addr elimination. If this is // the result of two address elimination, then the vreg is one of the // def-and-use register operand. - if (mi->isRegTiedToUseOperand(MOIdx)) { + + // It may also be partial redef like this: + // 80 %reg1041:6<def> = VSHRNv4i16 %reg1034<kill>, 12, pred:14, pred:%reg0 + // 120 %reg1041:5<def> = VSHRNv4i16 %reg1039<kill>, 12, pred:14, pred:%reg0 + bool PartReDef = isPartialRedef(MIIdx, MO, interval); + if (PartReDef || mi->isRegTiedToUseOperand(MOIdx)) { // If this is a two-address definition, then we have already processed // the live range. The only problem is that we didn't realize there // are actually two values in the live interval. Because of this we // need to take the LiveRegion that defines this register and split it // into two values. - assert(interval.containsOneValue()); + // Two-address vregs should always only be redefined once. This means + // that at this point, there should be exactly one value number in it. + assert((PartReDef || interval.containsOneValue()) && + "Unexpected 2-addr liveint!"); SlotIndex DefIndex = interval.getValNumInfo(0)->def.getDefIndex(); SlotIndex RedefIndex = MIIdx.getDefIndex(); if (MO.isEarlyClobber()) @@ -420,10 +451,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // because the 2-addr copy must be in the same MBB as the redef. interval.removeRange(DefIndex, RedefIndex); - // Two-address vregs should always only be redefined once. This means - // that at this point, there should be exactly one value number in it. - assert(interval.containsOneValue() && "Unexpected 2-addr liveint!"); - // The new value number (#1) is defined by the instruction we claimed // defined value #0. VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->getCopy(), @@ -434,6 +461,12 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // Value#0 is now defined by the 2-addr instruction. OldValNo->def = RedefIndex; OldValNo->setCopy(0); + + // A re-def may be a copy. e.g. %reg1030:6<def> = VMOVD %reg1026, ... + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (PartReDef && + tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) + OldValNo->setCopy(&*mi); // Add the new live interval which replaces the range for the input copy. LiveRange LR(DefIndex, RedefIndex, ValNo); @@ -451,8 +484,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, dbgs() << " RESULT: "; interval.print(dbgs(), tri_); }); - } else { - assert(lv_->isPHIJoin(interval.reg) && "Multiply defined register"); + } else if (lv_->isPHIJoin(interval.reg)) { // In the case of PHI elimination, each variable definition is only // live until the end of the block. We've already taken care of the // rest of the live range. @@ -475,6 +507,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, ValNo->addKill(indexes_->getTerminatorGap(mbb)); ValNo->setHasPHIKill(true); DEBUG(dbgs() << " phi-join +" << LR); + } else { + llvm_unreachable("Multiply defined register"); } } @@ -528,7 +562,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, end = baseIndex.getDefIndex(); goto exit; } else { - int DefIdx = mi->findRegisterDefOperandIdx(interval.reg, false, tri_); + int DefIdx = mi->findRegisterDefOperandIdx(interval.reg,false,false,tri_); if (DefIdx != -1) { if (mi->isRegTiedToUseOperand(DefIdx)) { // Two-address instruction. @@ -590,7 +624,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, for (const unsigned* AS = tri_->getSubRegisters(MO.getReg()); *AS; ++AS) // If MI also modifies the sub-register explicitly, avoid processing it // more than once. Do not pass in TRI here so it checks for exact match. - if (!MI->modifiesRegister(*AS)) + if (!MI->definesRegister(*AS)) handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, getOrCreateInterval(*AS), 0); } @@ -631,7 +665,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, end = baseIndex.getDefIndex(); SeenDefUse = true; break; - } else if (mi->modifiesRegister(interval.reg, tri_)) { + } else if (mi->definesRegister(interval.reg, tri_)) { // Another instruction redefines the register before it is ever read. // Then the register is essentially dead at the instruction that defines // it. Hence its interval is: @@ -1343,7 +1377,8 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, MI->eraseFromParent(); continue; } - assert(!O.isImplicit() && "Spilling register that's used as implicit use?"); + assert(!(O.isImplicit() && O.isUse()) && + "Spilling register that's used as implicit use?"); SlotIndex index = getInstructionIndex(MI); if (index < start || index >= end) continue; @@ -1605,7 +1640,7 @@ LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { // overflow a float. This expression behaves like 10^d for small d, but is // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of // headroom before overflow. - float lc = powf(1 + (100.0f / (loopDepth+10)), (float)loopDepth); + float lc = std::pow(1 + (100.0f / (loopDepth+10)), (float)loopDepth); return (isDef + isUse) * lc; } diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp index b4ef648..b0348a5 100644 --- a/lib/CodeGen/LowerSubregs.cpp +++ b/lib/CodeGen/LowerSubregs.cpp @@ -140,7 +140,8 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) { // Insert copy const TargetRegisterClass *TRCS = TRI->getPhysicalRegisterRegClass(DstReg); const TargetRegisterClass *TRCD = TRI->getPhysicalRegisterRegClass(SrcReg); - bool Emitted = TII->copyRegToReg(*MBB, MI, DstReg, SrcReg, TRCD, TRCS); + bool Emitted = TII->copyRegToReg(*MBB, MI, DstReg, SrcReg, TRCD, TRCS, + MI->getDebugLoc()); (void)Emitted; assert(Emitted && "Subreg and Dst must be of compatible register class"); // Transfer the kill/dead flags, if needed. @@ -193,7 +194,8 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { // Insert sub-register copy const TargetRegisterClass *TRC0= TRI->getPhysicalRegisterRegClass(DstSubReg); const TargetRegisterClass *TRC1= TRI->getPhysicalRegisterRegClass(InsReg); - bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1); + bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1, + MI->getDebugLoc()); (void)Emitted; assert(Emitted && "Subreg and Dst must be of compatible register class"); // Transfer the kill/dead flags, if needed. @@ -262,7 +264,8 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) { BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::KILL), DstSubReg); else { - bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1); + bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1, + MI->getDebugLoc()); (void)Emitted; assert(Emitted && "Subreg and Dst must be of compatible register class"); } diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 84c3d71..6f4f7a8 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ScopedHashTable.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" using namespace llvm; @@ -30,6 +31,9 @@ using namespace llvm; STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumCSEs, "Number of common subexpression eliminated"); +static cl::opt<bool> CSEPhysDef("machine-cse-phys-defs", + cl::init(false), cl::Hidden); + namespace { class MachineCSE : public MachineFunctionPass { const TargetInstrInfo *TII; @@ -39,7 +43,7 @@ namespace { MachineRegisterInfo *MRI; public: static char ID; // Pass identification - MachineCSE() : MachineFunctionPass(&ID), CurrVN(0) {} + MachineCSE() : MachineFunctionPass(&ID), LookAheadLimit(5), CurrVN(0) {} virtual bool runOnMachineFunction(MachineFunction &MF); @@ -52,6 +56,7 @@ namespace { } private: + const unsigned LookAheadLimit; typedef ScopedHashTableScope<MachineInstr*, unsigned, MachineInstrExpressionTrait> ScopeType; DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap; @@ -62,8 +67,12 @@ namespace { bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); bool isPhysDefTriviallyDead(unsigned Reg, MachineBasicBlock::const_iterator I, - MachineBasicBlock::const_iterator E); - bool hasLivePhysRegDefUse(MachineInstr *MI, MachineBasicBlock *MBB); + MachineBasicBlock::const_iterator E) const ; + bool hasLivePhysRegDefUse(const MachineInstr *MI, + const MachineBasicBlock *MBB, + unsigned &PhysDef) const; + bool PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, + unsigned PhysDef) const; bool isCSECandidate(MachineInstr *MI); bool isProfitableToCSE(unsigned CSReg, unsigned Reg, MachineInstr *CSMI, MachineInstr *MI); @@ -112,6 +121,7 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, DEBUG(dbgs() << "Coalescing: " << *DefMI); DEBUG(dbgs() << "*** to: " << *MI); MO.setReg(SrcReg); + MRI->clearKillFlags(SrcReg); if (NewRC != SRC) MRI->setRegClass(SrcReg, NewRC); DefMI->eraseFromParent(); @@ -123,10 +133,11 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, return Changed; } -bool MachineCSE::isPhysDefTriviallyDead(unsigned Reg, - MachineBasicBlock::const_iterator I, - MachineBasicBlock::const_iterator E) { - unsigned LookAheadLeft = 5; +bool +MachineCSE::isPhysDefTriviallyDead(unsigned Reg, + MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator E) const { + unsigned LookAheadLeft = LookAheadLimit; while (LookAheadLeft) { // Skip over dbg_value's. while (I != E && I->isDebugValue()) @@ -144,6 +155,7 @@ bool MachineCSE::isPhysDefTriviallyDead(unsigned Reg, if (!TRI->regsOverlap(MO.getReg(), Reg)) continue; if (MO.isUse()) + // Found a use! return false; SeenDef = true; } @@ -159,41 +171,73 @@ bool MachineCSE::isPhysDefTriviallyDead(unsigned Reg, } /// hasLivePhysRegDefUse - Return true if the specified instruction read / write -/// physical registers (except for dead defs of physical registers). -bool MachineCSE::hasLivePhysRegDefUse(MachineInstr *MI, MachineBasicBlock *MBB){ - unsigned PhysDef = 0; +/// physical registers (except for dead defs of physical registers). It also +/// returns the physical register def by reference if it's the only one. +bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI, + const MachineBasicBlock *MBB, + unsigned &PhysDef) const { + PhysDef = 0; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - if (MO.isUse()) - // Can't touch anything to read a physical register. - return true; - if (MO.isDead()) - // If the def is dead, it's ok. - continue; - // Ok, this is a physical register def that's not marked "dead". That's - // common since this pass is run before livevariables. We can scan - // forward a few instructions and check if it is obviously dead. - if (PhysDef) - // Multiple physical register defs. These are rare, forget about it. - return true; - PhysDef = Reg; + if (TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + if (MO.isUse()) + // Can't touch anything to read a physical register. + return true; + if (MO.isDead()) + // If the def is dead, it's ok. + continue; + // Ok, this is a physical register def that's not marked "dead". That's + // common since this pass is run before livevariables. We can scan + // forward a few instructions and check if it is obviously dead. + if (PhysDef) { + // Multiple physical register defs. These are rare, forget about it. + PhysDef = 0; + return true; } + PhysDef = Reg; } if (PhysDef) { - MachineBasicBlock::iterator I = MI; I = llvm::next(I); + MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); if (!isPhysDefTriviallyDead(PhysDef, I, MBB->end())) return true; } return false; } +bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, + unsigned PhysDef) const { + // For now conservatively returns false if the common subexpression is + // not in the same basic block as the given instruction. + MachineBasicBlock *MBB = MI->getParent(); + if (CSMI->getParent() != MBB) + return false; + MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I); + MachineBasicBlock::const_iterator E = MI; + unsigned LookAheadLeft = LookAheadLimit; + while (LookAheadLeft) { + // Skip over dbg_value's. + while (I != E && I->isDebugValue()) + ++I; + + if (I == E) + return true; + if (I->modifiesRegister(PhysDef, TRI)) + return false; + + --LookAheadLeft; + ++I; + } + + return false; +} + static bool isCopy(const MachineInstr *MI, const TargetInstrInfo *TII) { unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; return TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) || @@ -326,9 +370,20 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // If the instruction defines a physical register and the value *may* be // used, then it's not safe to replace it with a common subexpression. - if (FoundCSE && hasLivePhysRegDefUse(MI, MBB)) + unsigned PhysDef = 0; + if (FoundCSE && hasLivePhysRegDefUse(MI, MBB, PhysDef)) { FoundCSE = false; + // ... Unless the CS is local and it also defines the physical register + // which is not clobbered in between. + if (PhysDef && CSEPhysDef) { + unsigned CSVN = VNT.lookup(MI); + MachineInstr *CSMI = Exps[CSVN]; + if (PhysRegDefReaches(CSMI, MI, PhysDef)) + FoundCSE = true; + } + } + if (!FoundCSE) { VNT.insert(MI, CurrVN++); Exps.push_back(MI); @@ -365,8 +420,10 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // Actually perform the elimination. if (DoCSE) { - for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) + for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) { MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second); + MRI->clearKillFlags(CSEPairs[i].second); + } MI->eraseFromParent(); ++NumCSEs; } else { diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 3cf10b3..a38c881 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -398,8 +398,14 @@ void MachineFunction::viewCFGOnly() const unsigned MachineFunction::addLiveIn(unsigned PReg, const TargetRegisterClass *RC) { assert(RC->contains(PReg) && "Not the correct regclass!"); - unsigned VReg = getRegInfo().createVirtualRegister(RC); - getRegInfo().addLiveIn(PReg, VReg); + MachineRegisterInfo &MRI = getRegInfo(); + unsigned VReg = MRI.getLiveInVirtReg(PReg); + if (VReg) { + assert(MRI.getRegClass(VReg) == RC && "Register class mismatch!"); + return VReg; + } + VReg = MRI.createVirtualRegister(RC); + MRI.addLiveIn(PReg, VReg); return VReg; } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 99b5beb..e54cd5c 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -219,8 +219,12 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { OS << "%physreg" << getReg(); } - if (getSubReg() != 0) - OS << ':' << getSubReg(); + if (getSubReg() != 0) { + if (TM) + OS << ':' << TM->getRegisterInfo()->getSubRegIndexName(getSubReg()); + else + OS << ':' << getSubReg(); + } if (isDef() || isKill() || isDead() || isImplicit() || isUndef() || isEarlyClobber()) { @@ -781,25 +785,57 @@ int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill, } return -1; } - + +/// readsWritesVirtualRegister - Return a pair of bools (reads, writes) +/// indicating if this instruction reads or writes Reg. This also considers +/// partial defines. +std::pair<bool,bool> +MachineInstr::readsWritesVirtualRegister(unsigned Reg, + SmallVectorImpl<unsigned> *Ops) const { + bool PartDef = false; // Partial redefine. + bool FullDef = false; // Full define. + bool Use = false; + + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg() || MO.getReg() != Reg) + continue; + if (Ops) + Ops->push_back(i); + if (MO.isUse()) + Use |= !MO.isUndef(); + else if (MO.getSubReg()) + PartDef = true; + else + FullDef = true; + } + // A partial redefine uses Reg unless there is also a full define. + return std::make_pair(Use || (PartDef && !FullDef), PartDef || FullDef); +} + /// findRegisterDefOperandIdx() - Returns the operand index that is a def of /// the specified register or -1 if it is not found. If isDead is true, defs /// that are not dead are skipped. If TargetRegisterInfo is non-null, then it /// also checks if there is a def of a super-register. -int MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, - const TargetRegisterInfo *TRI) const { +int +MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap, + const TargetRegisterInfo *TRI) const { + bool isPhys = TargetRegisterInfo::isPhysicalRegister(Reg); for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; unsigned MOReg = MO.getReg(); - if (MOReg == Reg || - (TRI && - TargetRegisterInfo::isPhysicalRegister(MOReg) && - TargetRegisterInfo::isPhysicalRegister(Reg) && - TRI->isSubRegister(MOReg, Reg))) - if (!isDead || MO.isDead()) - return i; + bool Found = (MOReg == Reg); + if (!Found && TRI && isPhys && + TargetRegisterInfo::isPhysicalRegister(MOReg)) { + if (Overlap) + Found = TRI->regsOverlap(MOReg, Reg); + else + Found = TRI->isSubRegister(MOReg, Reg); + } + if (Found && (!isDead || MO.isDead())) + return i; } return -1; } @@ -938,6 +974,16 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { return true; } +/// clearKillInfo - Clears kill flags on all operands. +/// +void MachineInstr::clearKillInfo() { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (MO.isReg() && MO.isUse()) + MO.setIsKill(false); + } +} + /// copyKillDeadInfo - Copies kill / dead operand properties from MI. /// void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) { @@ -1355,11 +1401,21 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg, void MachineInstr::addRegisterDefined(unsigned IncomingReg, const TargetRegisterInfo *RegInfo) { - MachineOperand *MO = findRegisterDefOperand(IncomingReg, false, RegInfo); - if (!MO || MO->getSubReg()) - addOperand(MachineOperand::CreateReg(IncomingReg, - true /*IsDef*/, - true /*IsImp*/)); + if (TargetRegisterInfo::isPhysicalRegister(IncomingReg)) { + MachineOperand *MO = findRegisterDefOperand(IncomingReg, false, RegInfo); + if (MO) + return; + } else { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (MO.isReg() && MO.getReg() == IncomingReg && MO.isDef() && + MO.getSubReg() == 0) + return; + } + } + addOperand(MachineOperand::CreateReg(IncomingReg, + true /*IsDef*/, + true /*IsImp*/)); } unsigned diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index b2e757d..6120617 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -738,8 +738,10 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI, "Instructions with different phys regs are not identical!"); if (MO.isReg() && MO.isDef() && - !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg()); + RegInfo->clearKillFlags(Dup->getOperand(i).getReg()); + } } MI->eraseFromParent(); ++NumCSEed; @@ -784,6 +786,15 @@ void MachineLICM::Hoist(MachineInstr *MI) { // Otherwise, splice the instruction to the preheader. CurPreheader->splice(CurPreheader->getFirstTerminator(),MI->getParent(),MI); + // Clear the kill flags of any register this instruction defines, + // since they may need to be live throughout the entire loop + // rather than just live for part of it. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && !MO.isDead()) + RegInfo->clearKillFlags(MO.getReg()); + } + // Add to the CSE map. if (CI != CSEMap.end()) CI->second.push_back(MI); diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index ea5ca0c..70bf7e5 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -133,6 +133,15 @@ bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const { return ++UI == use_nodbg_end(); } +/// clearKillFlags - Iterate over all the uses of the given register and +/// clear the kill flag from the MachineOperand. This function is used by +/// optimization passes which extend register lifetimes and need only +/// preserve conservative kill flag information. +void MachineRegisterInfo::clearKillFlags(unsigned Reg) const { + for (use_iterator UI = use_begin(Reg), UE = use_end(); UI != UE; ++UI) + UI.getOperand().setIsKill(false); +} + bool MachineRegisterInfo::isLiveIn(unsigned Reg) const { for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) if (I->first == Reg || I->second == Reg) @@ -156,6 +165,15 @@ unsigned MachineRegisterInfo::getLiveInPhysReg(unsigned VReg) const { return 0; } +/// getLiveInVirtReg - If PReg is a live-in physical register, return the +/// corresponding live-in physical register. +unsigned MachineRegisterInfo::getLiveInVirtReg(unsigned PReg) const { + for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) + if (I->first == PReg) + return I->second; + return 0; +} + static cl::opt<bool> SchedLiveInCopies("schedule-livein-copies", cl::Hidden, cl::desc("Schedule copies of livein registers"), @@ -218,7 +236,8 @@ static void EmitLiveInCopy(MachineBasicBlock *MBB, --Pos; } - bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC); + bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC, + DebugLoc()); assert(Emitted && "Unable to issue a live-in copy instruction!\n"); (void) Emitted; @@ -253,7 +272,8 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, if (LI->second) { const TargetRegisterClass *RC = getRegClass(LI->second); bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(), - LI->second, LI->first, RC, RC); + LI->second, LI->first, RC, RC, + DebugLoc()); assert(Emitted && "Unable to issue a live-in copy instruction!\n"); (void) Emitted; } @@ -265,6 +285,15 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, EntryMBB->addLiveIn(I->first); } +void MachineRegisterInfo::closePhysRegsUsed(const TargetRegisterInfo &TRI) { + for (int i = UsedPhysRegs.find_first(); i >= 0; + i = UsedPhysRegs.find_next(i)) + for (const unsigned *SS = TRI.getSubRegisters(i); + unsigned SubReg = *SS; ++SS) + if (SubReg > unsigned(i)) + UsedPhysRegs.set(SubReg); +} + #ifndef NDEBUG void MachineRegisterInfo::dumpUses(unsigned Reg) const { for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I) diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index b8996d4..84d6df2 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -26,39 +26,17 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/SSAUpdaterImpl.h" using namespace llvm; -/// BBInfo - Per-basic block information used internally by MachineSSAUpdater. -class MachineSSAUpdater::BBInfo { -public: - MachineBasicBlock *BB; // Back-pointer to the corresponding block. - unsigned AvailableVal; // Value to use in this block. - BBInfo *DefBB; // Block that defines the available value. - int BlkNum; // Postorder number. - BBInfo *IDom; // Immediate dominator. - unsigned NumPreds; // Number of predecessor blocks. - BBInfo **Preds; // Array[NumPreds] of predecessor blocks. - MachineInstr *PHITag; // Marker for existing PHIs that match. - - BBInfo(MachineBasicBlock *ThisBB, unsigned V) - : BB(ThisBB), AvailableVal(V), DefBB(V ? this : 0), BlkNum(0), IDom(0), - NumPreds(0), Preds(0), PHITag(0) { } -}; - -typedef DenseMap<MachineBasicBlock*, MachineSSAUpdater::BBInfo*> BBMapTy; - typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy; static AvailableValsTy &getAvailableVals(void *AV) { return *static_cast<AvailableValsTy*>(AV); } -static BBMapTy *getBBMap(void *BM) { - return static_cast<BBMapTy*>(BM); -} - MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF, SmallVectorImpl<MachineInstr*> *NewPHI) - : AV(0), BM(0), InsertedPHIs(NewPHI) { + : AV(0), InsertedPHIs(NewPHI) { TII = MF.getTarget().getInstrInfo(); MRI = &MF.getRegInfo(); } @@ -134,7 +112,8 @@ static MachineInstr *InsertNewDef(unsigned Opcode, MachineBasicBlock *BB, MachineBasicBlock::iterator I, const TargetRegisterClass *RC, - MachineRegisterInfo *MRI, const TargetInstrInfo *TII) { + MachineRegisterInfo *MRI, + const TargetInstrInfo *TII) { unsigned NewVR = MRI->createVirtualRegister(RC); return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR); } @@ -263,438 +242,131 @@ void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) { I->second = NewReg; } -/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry -/// for the specified BB and if so, return it. If not, construct SSA form by -/// first calculating the required placement of PHIs and then inserting new -/// PHIs where needed. -unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){ - AvailableValsTy &AvailableVals = getAvailableVals(AV); - if (unsigned V = AvailableVals[BB]) - return V; +/// MachinePHIiter - Iterator for PHI operands. This is used for the +/// PHI_iterator in the SSAUpdaterImpl template. +namespace { + class MachinePHIiter { + private: + MachineInstr *PHI; + unsigned idx; + + public: + explicit MachinePHIiter(MachineInstr *P) // begin iterator + : PHI(P), idx(1) {} + MachinePHIiter(MachineInstr *P, bool) // end iterator + : PHI(P), idx(PHI->getNumOperands()) {} + + MachinePHIiter &operator++() { idx += 2; return *this; } + bool operator==(const MachinePHIiter& x) const { return idx == x.idx; } + bool operator!=(const MachinePHIiter& x) const { return !operator==(x); } + unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); } + MachineBasicBlock *getIncomingBlock() { + return PHI->getOperand(idx+1).getMBB(); + } + }; +} - // Pool allocation used internally by GetValueAtEndOfBlock. - BumpPtrAllocator Allocator; - BBMapTy BBMapObj; - BM = &BBMapObj; +/// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl +/// template, specialized for MachineSSAUpdater. +namespace llvm { +template<> +class SSAUpdaterTraits<MachineSSAUpdater> { +public: + typedef MachineBasicBlock BlkT; + typedef unsigned ValT; + typedef MachineInstr PhiT; + + typedef MachineBasicBlock::succ_iterator BlkSucc_iterator; + static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); } + static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); } + + typedef MachinePHIiter PHI_iterator; + static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); } + static inline PHI_iterator PHI_end(PhiT *PHI) { + return PHI_iterator(PHI, true); + } - SmallVector<BBInfo*, 100> BlockList; - BuildBlockList(BB, &BlockList, &Allocator); + /// FindPredecessorBlocks - Put the predecessors of BB into the Preds + /// vector. + static void FindPredecessorBlocks(MachineBasicBlock *BB, + SmallVectorImpl<MachineBasicBlock*> *Preds){ + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + E = BB->pred_end(); PI != E; ++PI) + Preds->push_back(*PI); + } - // Special case: bail out if BB is unreachable. - if (BlockList.size() == 0) { - BM = 0; + /// GetUndefVal - Create an IMPLICIT_DEF instruction with a new register. + /// Add it into the specified block and return the register. + static unsigned GetUndefVal(MachineBasicBlock *BB, + MachineSSAUpdater *Updater) { // Insert an implicit_def to represent an undef value. MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF, BB, BB->getFirstTerminator(), - VRC, MRI, TII); - unsigned V = NewDef->getOperand(0).getReg(); - AvailableVals[BB] = V; - return V; - } - - FindDominators(&BlockList); - FindPHIPlacement(&BlockList); - FindAvailableVals(&BlockList); - - BM = 0; - return BBMapObj[BB]->DefBB->AvailableVal; -} - -/// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds -/// vector, set Info->NumPreds, and allocate space in Info->Preds. -static void FindPredecessorBlocks(MachineSSAUpdater::BBInfo *Info, - SmallVectorImpl<MachineBasicBlock*> *Preds, - BumpPtrAllocator *Allocator) { - MachineBasicBlock *BB = Info->BB; - for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), - E = BB->pred_end(); PI != E; ++PI) - Preds->push_back(*PI); - - Info->NumPreds = Preds->size(); - Info->Preds = static_cast<MachineSSAUpdater::BBInfo**> - (Allocator->Allocate(Info->NumPreds * sizeof(MachineSSAUpdater::BBInfo*), - AlignOf<MachineSSAUpdater::BBInfo*>::Alignment)); -} - -/// BuildBlockList - Starting from the specified basic block, traverse back -/// through its predecessors until reaching blocks with known values. Create -/// BBInfo structures for the blocks and append them to the block list. -void MachineSSAUpdater::BuildBlockList(MachineBasicBlock *BB, - BlockListTy *BlockList, - BumpPtrAllocator *Allocator) { - AvailableValsTy &AvailableVals = getAvailableVals(AV); - BBMapTy *BBMap = getBBMap(BM); - SmallVector<BBInfo*, 10> RootList; - SmallVector<BBInfo*, 64> WorkList; - - BBInfo *Info = new (*Allocator) BBInfo(BB, 0); - (*BBMap)[BB] = Info; - WorkList.push_back(Info); - - // Search backward from BB, creating BBInfos along the way and stopping when - // reaching blocks that define the value. Record those defining blocks on - // the RootList. - SmallVector<MachineBasicBlock*, 10> Preds; - while (!WorkList.empty()) { - Info = WorkList.pop_back_val(); - Preds.clear(); - FindPredecessorBlocks(Info, &Preds, Allocator); - - // Treat an unreachable predecessor as a definition with 'undef'. - if (Info->NumPreds == 0) { - // Insert an implicit_def to represent an undef value. - MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF, - Info->BB, - Info->BB->getFirstTerminator(), - VRC, MRI, TII); - Info->AvailableVal = NewDef->getOperand(0).getReg(); - Info->DefBB = Info; - RootList.push_back(Info); - continue; - } - - for (unsigned p = 0; p != Info->NumPreds; ++p) { - MachineBasicBlock *Pred = Preds[p]; - // Check if BBMap already has a BBInfo for the predecessor block. - BBMapTy::value_type &BBMapBucket = BBMap->FindAndConstruct(Pred); - if (BBMapBucket.second) { - Info->Preds[p] = BBMapBucket.second; - continue; - } - - // Create a new BBInfo for the predecessor. - unsigned PredVal = AvailableVals.lookup(Pred); - BBInfo *PredInfo = new (*Allocator) BBInfo(Pred, PredVal); - BBMapBucket.second = PredInfo; - Info->Preds[p] = PredInfo; - - if (PredInfo->AvailableVal) { - RootList.push_back(PredInfo); - continue; - } - WorkList.push_back(PredInfo); - } + Updater->VRC, Updater->MRI, + Updater->TII); + return NewDef->getOperand(0).getReg(); } - // Now that we know what blocks are backwards-reachable from the starting - // block, do a forward depth-first traversal to assign postorder numbers - // to those blocks. - BBInfo *PseudoEntry = new (*Allocator) BBInfo(0, 0); - unsigned BlkNum = 1; - - // Initialize the worklist with the roots from the backward traversal. - while (!RootList.empty()) { - Info = RootList.pop_back_val(); - Info->IDom = PseudoEntry; - Info->BlkNum = -1; - WorkList.push_back(Info); + /// CreateEmptyPHI - Create a PHI instruction that defines a new register. + /// Add it into the specified block and return the register. + static unsigned CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds, + MachineSSAUpdater *Updater) { + MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front(); + MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc, + Updater->VRC, Updater->MRI, + Updater->TII); + return PHI->getOperand(0).getReg(); } - while (!WorkList.empty()) { - Info = WorkList.back(); - - if (Info->BlkNum == -2) { - // All the successors have been handled; assign the postorder number. - Info->BlkNum = BlkNum++; - // If not a root, put it on the BlockList. - if (!Info->AvailableVal) - BlockList->push_back(Info); - WorkList.pop_back(); - continue; - } - - // Leave this entry on the worklist, but set its BlkNum to mark that its - // successors have been put on the worklist. When it returns to the top - // the list, after handling its successors, it will be assigned a number. - Info->BlkNum = -2; - - // Add unvisited successors to the work list. - for (MachineBasicBlock::succ_iterator SI = Info->BB->succ_begin(), - E = Info->BB->succ_end(); SI != E; ++SI) { - BBInfo *SuccInfo = (*BBMap)[*SI]; - if (!SuccInfo || SuccInfo->BlkNum) - continue; - SuccInfo->BlkNum = -1; - WorkList.push_back(SuccInfo); - } + /// AddPHIOperand - Add the specified value as an operand of the PHI for + /// the specified predecessor block. + static void AddPHIOperand(MachineInstr *PHI, unsigned Val, + MachineBasicBlock *Pred) { + PHI->addOperand(MachineOperand::CreateReg(Val, false)); + PHI->addOperand(MachineOperand::CreateMBB(Pred)); } - PseudoEntry->BlkNum = BlkNum; -} -/// IntersectDominators - This is the dataflow lattice "meet" operation for -/// finding dominators. Given two basic blocks, it walks up the dominator -/// tree until it finds a common dominator of both. It uses the postorder -/// number of the blocks to determine how to do that. -static MachineSSAUpdater::BBInfo * -IntersectDominators(MachineSSAUpdater::BBInfo *Blk1, - MachineSSAUpdater::BBInfo *Blk2) { - while (Blk1 != Blk2) { - while (Blk1->BlkNum < Blk2->BlkNum) { - Blk1 = Blk1->IDom; - if (!Blk1) - return Blk2; - } - while (Blk2->BlkNum < Blk1->BlkNum) { - Blk2 = Blk2->IDom; - if (!Blk2) - return Blk1; - } - } - return Blk1; -} - -/// FindDominators - Calculate the dominator tree for the subset of the CFG -/// corresponding to the basic blocks on the BlockList. This uses the -/// algorithm from: "A Simple, Fast Dominance Algorithm" by Cooper, Harvey and -/// Kennedy, published in Software--Practice and Experience, 2001, 4:1-10. -/// Because the CFG subset does not include any edges leading into blocks that -/// define the value, the results are not the usual dominator tree. The CFG -/// subset has a single pseudo-entry node with edges to a set of root nodes -/// for blocks that define the value. The dominators for this subset CFG are -/// not the standard dominators but they are adequate for placing PHIs within -/// the subset CFG. -void MachineSSAUpdater::FindDominators(BlockListTy *BlockList) { - bool Changed; - do { - Changed = false; - // Iterate over the list in reverse order, i.e., forward on CFG edges. - for (BlockListTy::reverse_iterator I = BlockList->rbegin(), - E = BlockList->rend(); I != E; ++I) { - BBInfo *Info = *I; - - // Start with the first predecessor. - assert(Info->NumPreds > 0 && "unreachable block"); - BBInfo *NewIDom = Info->Preds[0]; - - // Iterate through the block's other predecessors. - for (unsigned p = 1; p != Info->NumPreds; ++p) { - BBInfo *Pred = Info->Preds[p]; - NewIDom = IntersectDominators(NewIDom, Pred); - } - - // Check if the IDom value has changed. - if (NewIDom != Info->IDom) { - Info->IDom = NewIDom; - Changed = true; - } - } - } while (Changed); -} - -/// IsDefInDomFrontier - Search up the dominator tree from Pred to IDom for -/// any blocks containing definitions of the value. If one is found, then the -/// successor of Pred is in the dominance frontier for the definition, and -/// this function returns true. -static bool IsDefInDomFrontier(const MachineSSAUpdater::BBInfo *Pred, - const MachineSSAUpdater::BBInfo *IDom) { - for (; Pred != IDom; Pred = Pred->IDom) { - if (Pred->DefBB == Pred) - return true; + /// InstrIsPHI - Check if an instruction is a PHI. + /// + static MachineInstr *InstrIsPHI(MachineInstr *I) { + if (I && I->isPHI()) + return I; + return 0; } - return false; -} - -/// FindPHIPlacement - PHIs are needed in the iterated dominance frontiers of -/// the known definitions. Iteratively add PHIs in the dom frontiers until -/// nothing changes. Along the way, keep track of the nearest dominating -/// definitions for non-PHI blocks. -void MachineSSAUpdater::FindPHIPlacement(BlockListTy *BlockList) { - bool Changed; - do { - Changed = false; - // Iterate over the list in reverse order, i.e., forward on CFG edges. - for (BlockListTy::reverse_iterator I = BlockList->rbegin(), - E = BlockList->rend(); I != E; ++I) { - BBInfo *Info = *I; - - // If this block already needs a PHI, there is nothing to do here. - if (Info->DefBB == Info) - continue; - - // Default to use the same def as the immediate dominator. - BBInfo *NewDefBB = Info->IDom->DefBB; - for (unsigned p = 0; p != Info->NumPreds; ++p) { - if (IsDefInDomFrontier(Info->Preds[p], Info->IDom)) { - // Need a PHI here. - NewDefBB = Info; - break; - } - } - // Check if anything changed. - if (NewDefBB != Info->DefBB) { - Info->DefBB = NewDefBB; - Changed = true; - } - } - } while (Changed); -} - -/// FindAvailableVal - If this block requires a PHI, first check if an existing -/// PHI matches the PHI placement and reaching definitions computed earlier, -/// and if not, create a new PHI. Visit all the block's predecessors to -/// calculate the available value for each one and fill in the incoming values -/// for a new PHI. -void MachineSSAUpdater::FindAvailableVals(BlockListTy *BlockList) { - AvailableValsTy &AvailableVals = getAvailableVals(AV); - - // Go through the worklist in forward order (i.e., backward through the CFG) - // and check if existing PHIs can be used. If not, create empty PHIs where - // they are needed. - for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end(); - I != E; ++I) { - BBInfo *Info = *I; - // Check if there needs to be a PHI in BB. - if (Info->DefBB != Info) - continue; - - // Look for an existing PHI. - FindExistingPHI(Info->BB, BlockList); - if (Info->AvailableVal) - continue; - - MachineBasicBlock::iterator Loc = - Info->BB->empty() ? Info->BB->end() : Info->BB->front(); - MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, Info->BB, Loc, - VRC, MRI, TII); - unsigned PHI = InsertedPHI->getOperand(0).getReg(); - Info->AvailableVal = PHI; - AvailableVals[Info->BB] = PHI; + /// ValueIsPHI - Check if the instruction that defines the specified register + /// is a PHI instruction. + static MachineInstr *ValueIsPHI(unsigned Val, MachineSSAUpdater *Updater) { + return InstrIsPHI(Updater->MRI->getVRegDef(Val)); } - // Now go back through the worklist in reverse order to fill in the arguments - // for any new PHIs added in the forward traversal. - for (BlockListTy::reverse_iterator I = BlockList->rbegin(), - E = BlockList->rend(); I != E; ++I) { - BBInfo *Info = *I; - - if (Info->DefBB != Info) { - // Record the available value at join nodes to speed up subsequent - // uses of this SSAUpdater for the same value. - if (Info->NumPreds > 1) - AvailableVals[Info->BB] = Info->DefBB->AvailableVal; - continue; - } - - // Check if this block contains a newly added PHI. - unsigned PHI = Info->AvailableVal; - MachineInstr *InsertedPHI = MRI->getVRegDef(PHI); - if (!InsertedPHI->isPHI() || InsertedPHI->getNumOperands() > 1) - continue; - - // Iterate through the block's predecessors. - MachineInstrBuilder MIB(InsertedPHI); - for (unsigned p = 0; p != Info->NumPreds; ++p) { - BBInfo *PredInfo = Info->Preds[p]; - MachineBasicBlock *Pred = PredInfo->BB; - // Skip to the nearest preceding definition. - if (PredInfo->DefBB != PredInfo) - PredInfo = PredInfo->DefBB; - MIB.addReg(PredInfo->AvailableVal).addMBB(Pred); - } - - DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); - - // If the client wants to know about all new instructions, tell it. - if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); + /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source + /// operands, i.e., it was just added. + static MachineInstr *ValueIsNewPHI(unsigned Val, MachineSSAUpdater *Updater) { + MachineInstr *PHI = ValueIsPHI(Val, Updater); + if (PHI && PHI->getNumOperands() <= 1) + return PHI; + return 0; } -} -/// FindExistingPHI - Look through the PHI nodes in a block to see if any of -/// them match what is needed. -void MachineSSAUpdater::FindExistingPHI(MachineBasicBlock *BB, - BlockListTy *BlockList) { - for (MachineBasicBlock::iterator BBI = BB->begin(), BBE = BB->end(); - BBI != BBE && BBI->isPHI(); ++BBI) { - if (CheckIfPHIMatches(BBI)) { - RecordMatchingPHI(BBI); - break; - } - // Match failed: clear all the PHITag values. - for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end(); - I != E; ++I) - (*I)->PHITag = 0; + /// GetPHIValue - For the specified PHI instruction, return the register + /// that it defines. + static unsigned GetPHIValue(MachineInstr *PHI) { + return PHI->getOperand(0).getReg(); } -} - -/// CheckIfPHIMatches - Check if a PHI node matches the placement and values -/// in the BBMap. -bool MachineSSAUpdater::CheckIfPHIMatches(MachineInstr *PHI) { - BBMapTy *BBMap = getBBMap(BM); - SmallVector<MachineInstr*, 20> WorkList; - WorkList.push_back(PHI); - - // Mark that the block containing this PHI has been visited. - (*BBMap)[PHI->getParent()]->PHITag = PHI; - - while (!WorkList.empty()) { - PHI = WorkList.pop_back_val(); - - // Iterate through the PHI's incoming values. - for (unsigned i = 1, e = PHI->getNumOperands(); i != e; i += 2) { - unsigned IncomingVal = PHI->getOperand(i).getReg(); - BBInfo *PredInfo = (*BBMap)[PHI->getOperand(i+1).getMBB()]; - // Skip to the nearest preceding definition. - if (PredInfo->DefBB != PredInfo) - PredInfo = PredInfo->DefBB; - - // Check if it matches the expected value. - if (PredInfo->AvailableVal) { - if (IncomingVal == PredInfo->AvailableVal) - continue; - return false; - } - - // Check if the value is a PHI in the correct block. - MachineInstr *IncomingPHIVal = MRI->getVRegDef(IncomingVal); - if (!IncomingPHIVal->isPHI() || - IncomingPHIVal->getParent() != PredInfo->BB) - return false; - - // If this block has already been visited, check if this PHI matches. - if (PredInfo->PHITag) { - if (IncomingPHIVal == PredInfo->PHITag) - continue; - return false; - } - PredInfo->PHITag = IncomingPHIVal; +}; - WorkList.push_back(IncomingPHIVal); - } - } - return true; -} +} // End llvm namespace -/// RecordMatchingPHI - For a PHI node that matches, record it and its input -/// PHIs in both the BBMap and the AvailableVals mapping. -void MachineSSAUpdater::RecordMatchingPHI(MachineInstr *PHI) { - BBMapTy *BBMap = getBBMap(BM); +/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry +/// for the specified BB and if so, return it. If not, construct SSA form by +/// first calculating the required placement of PHIs and then inserting new +/// PHIs where needed. +unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){ AvailableValsTy &AvailableVals = getAvailableVals(AV); - SmallVector<MachineInstr*, 20> WorkList; - WorkList.push_back(PHI); - - // Record this PHI. - MachineBasicBlock *BB = PHI->getParent(); - AvailableVals[BB] = PHI->getOperand(0).getReg(); - (*BBMap)[BB]->AvailableVal = PHI->getOperand(0).getReg(); - - while (!WorkList.empty()) { - PHI = WorkList.pop_back_val(); - - // Iterate through the PHI's incoming values. - for (unsigned i = 1, e = PHI->getNumOperands(); i != e; i += 2) { - unsigned IncomingVal = PHI->getOperand(i).getReg(); - MachineInstr *IncomingPHIVal = MRI->getVRegDef(IncomingVal); - if (!IncomingPHIVal->isPHI()) continue; - BB = IncomingPHIVal->getParent(); - BBInfo *Info = (*BBMap)[BB]; - if (!Info || Info->AvailableVal) - continue; - - // Record the PHI and add it to the worklist. - AvailableVals[BB] = IncomingVal; - Info->AvailableVal = IncomingVal; - WorkList.push_back(IncomingPHIVal); - } - } + if (unsigned V = AvailableVals[BB]) + return V; + + SSAUpdaterImpl<MachineSSAUpdater> Impl(this, &AvailableVals, InsertedPHIs); + return Impl.GetValue(BB); } diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index ef489dc..1610e6c 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -314,5 +314,10 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // Move the instruction. SuccToSinkTo->splice(InsertPos, ParentBlock, MI, ++MachineBasicBlock::iterator(MI)); + + // Conservatively, clear any kill flags, since it's possible that + // they are no longer correct. + MI->clearKillInfo(); + return true; } diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 0b75c55..8baf01c 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -47,7 +47,7 @@ namespace { MachineVerifier(Pass *pass, bool allowDoubleDefs) : PASS(pass), allowVirtDoubleDefs(allowDoubleDefs), - allowPhysDoubleDefs(allowDoubleDefs), + allowPhysDoubleDefs(true), OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS")) {} @@ -552,19 +552,23 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { regsLiveInButUnused.erase(Reg); bool isKill = false; - if (MO->isKill()) { - isKill = true; - // Tied operands on two-address instuctions MUST NOT have a <kill> flag. - if (MI->isRegTiedToDefOperand(MONum)) + unsigned defIdx; + if (MI->isRegTiedToDefOperand(MONum, &defIdx)) { + // A two-addr use counts as a kill if use and def are the same. + unsigned DefReg = MI->getOperand(defIdx).getReg(); + if (Reg == DefReg) { + isKill = true; + // ANd in that case an explicit kill flag is not allowed. + if (MO->isKill()) report("Illegal kill flag on two-address instruction operand", MO, MONum); - } else { - // TwoAddress instr modifying a reg is treated as kill+def. - unsigned defIdx; - if (MI->isRegTiedToDefOperand(MONum, &defIdx) && - MI->getOperand(defIdx).getReg() == Reg) - isKill = true; - } + } else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + report("Two-address instruction operands must be identical", + MO, MONum); + } + } else + isKill = MO->isKill(); + if (isKill) { addRegWithSubRegs(regsKilled, Reg); @@ -631,11 +635,14 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { // Virtual register. const TargetRegisterClass *RC = MRI->getRegClass(Reg); if (SubIdx) { - if (RC->subregclasses_begin()+SubIdx >= RC->subregclasses_end()) { + const TargetRegisterClass *SRC = RC->getSubRegisterRegClass(SubIdx); + if (!SRC) { report("Invalid subregister index for virtual register", MO, MONum); + *OS << "Register class " << RC->getName() + << " does not support subreg index " << SubIdx << "\n"; return; } - RC = *(RC->subregclasses_begin()+SubIdx); + RC = SRC; } if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) { if (RC != DRC && !RC->hasSuperClass(DRC)) { diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 1651719..edbc13f 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -27,10 +27,8 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include <algorithm> #include <map> using namespace llvm; @@ -88,10 +86,6 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &MF) { ImpDefs.clear(); VRegPHIUseCount.clear(); - // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve - // SSA form. - Changed |= EliminateRegSequences(MF); - return Changed; } @@ -216,7 +210,8 @@ void llvm::PHIElimination::LowerAtomicPHINode( } else { entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC); } - TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC); + TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC, + MPhi->getDebugLoc()); } // Update live variable information if there is any. @@ -298,7 +293,8 @@ void llvm::PHIElimination::LowerAtomicPHINode( // Insert the copy. if (!reusedIncoming && IncomingReg) - TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC); + TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC, + MPhi->getDebugLoc()); // Now update live variable information if we have it. Otherwise we're done if (!LV) continue; @@ -449,58 +445,3 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A, return NMBB; } - -static void UpdateRegSequenceSrcs(unsigned SrcReg, - unsigned DstReg, unsigned SrcIdx, - MachineRegisterInfo *MRI) { - for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), - UE = MRI->reg_end(); RI != UE; ) { - MachineOperand &MO = RI.getOperand(); - ++RI; - MO.setReg(DstReg); - MO.setSubReg(SrcIdx); - } -} - -/// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as second part -/// of de-ssa process. This replaces sources of REG_SEQUENCE as sub-register -/// references of the register defined by REG_SEQUENCE. e.g. -/// -/// %reg1029<def>, %reg1030<def> = VLD1q16 %reg1024<kill>, ... -/// %reg1031<def> = REG_SEQUENCE %reg1029<kill>, 5, %reg1030<kill>, 6 -/// => -/// %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ... -bool PHIElimination::EliminateRegSequences(MachineFunction &MF) { - bool Changed = false; - - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); - BBI != BBE; ) { - MachineInstr &MI = *BBI; - ++BBI; - if (MI.getOpcode() != TargetOpcode::REG_SEQUENCE) - continue; - unsigned DstReg = MI.getOperand(0).getReg(); - if (MI.getOperand(0).getSubReg() || - TargetRegisterInfo::isPhysicalRegister(DstReg) || - !(MI.getNumOperands() & 1)) { - DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI); - llvm_unreachable(0); - } - for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) { - unsigned SrcReg = MI.getOperand(i).getReg(); - if (MI.getOperand(i).getSubReg() || - TargetRegisterInfo::isPhysicalRegister(SrcReg)) { - DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI); - llvm_unreachable(0); - } - unsigned SrcIdx = MI.getOperand(i+1).getImm(); - UpdateRegSequenceSrcs(SrcReg, DstReg, SrcIdx, MRI); - } - - MI.eraseFromParent(); - Changed = true; - } - - return Changed; -} diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h index 3292aa2..7dedf03 100644 --- a/lib/CodeGen/PHIElimination.h +++ b/lib/CodeGen/PHIElimination.h @@ -94,8 +94,6 @@ namespace llvm { return I; } - bool EliminateRegSequences(MachineFunction &MF); - typedef std::pair<unsigned, unsigned> BBVRegPair; typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse; diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index d3e1295..9714ea6 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -114,7 +114,7 @@ namespace { /// AvailableQueue - The priority queue to use for the available SUnits. /// LatencyPriorityQueue AvailableQueue; - + /// PendingQueue - This contains all of the instructions whose operands have /// been issued, but their results are not ready yet (due to the latency of /// the operation). Once the operands becomes available, the instruction is @@ -158,7 +158,7 @@ namespace { /// Schedule - Schedule the instruction range using list scheduling. /// void Schedule(); - + /// Observe - Update liveness information to account for the current /// instruction, which will not be scheduled. /// @@ -179,7 +179,7 @@ namespace { void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); void ListScheduleTopDown(); void StartBlockForKills(MachineBasicBlock *BB); - + // ToggleKillFlag - Toggle a register operand kill flag. Other // adjustments may be made to the instruction if necessary. Return // true if the operand has been deleted, false if not. @@ -197,13 +197,13 @@ static bool isSchedulingBoundary(const MachineInstr *MI, if (MI->getDesc().isTerminator() || MI->isLabel()) return true; - // Don't attempt to schedule around any instruction that modifies + // Don't attempt to schedule around any instruction that defines // a stack-oriented pointer, as it's unlikely to be profitable. This // saves compile time, because it doesn't require every single // stack slot reference to depend on the instruction that does the // modification. const TargetLowering &TLI = *MF.getTarget().getTargetLowering(); - if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore())) + if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore())) return true; return false; @@ -227,9 +227,10 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { // Check for antidep breaking override... if (EnableAntiDepBreaking.getPosition() > 0) { - AntiDepMode = (EnableAntiDepBreaking == "all") ? TargetSubtarget::ANTIDEP_ALL : - (EnableAntiDepBreaking == "critical") ? TargetSubtarget::ANTIDEP_CRITICAL : - TargetSubtarget::ANTIDEP_NONE; + AntiDepMode = (EnableAntiDepBreaking == "all") ? + TargetSubtarget::ANTIDEP_ALL : + (EnableAntiDepBreaking == "critical") + ? TargetSubtarget::ANTIDEP_CRITICAL : TargetSubtarget::ANTIDEP_NONE; } DEBUG(dbgs() << "PostRAScheduler\n"); @@ -240,10 +241,10 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ? (ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) : (ScheduleHazardRecognizer *)new SimpleHazardRecognizer(); - AntiDepBreaker *ADB = + AntiDepBreaker *ADB = ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ? (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) : - ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ? + ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ? (AntiDepBreaker *)new CriticalAntiDepBreaker(Fn) : NULL)); SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, ADB, AA); @@ -265,17 +266,6 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { // Initialize register live-range state for scheduling in this block. Scheduler.StartBlock(MBB); - // FIXME: Temporary workaround for <rdar://problem/7759363>: The post-RA - // scheduler has some sort of problem with DebugValue instructions that - // causes an assertion in LeaksContext.h to fail occasionally. Just - // remove all those instructions for now. - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - I != E; ) { - MachineInstr *MI = &*I++; - if (MI->isDebugValue()) - MI->eraseFromParent(); - } - // Schedule each sequence of instructions not interrupted by a label // or anything else that effectively needs to shut down scheduling. MachineBasicBlock::iterator Current = MBB->end(); @@ -310,7 +300,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { return true; } - + /// StartBlock - Initialize register live-range state for scheduling in /// this block. /// @@ -331,10 +321,10 @@ void SchedulePostRATDList::Schedule() { BuildSchedGraph(AA); if (AntiDepBreak != NULL) { - unsigned Broken = + unsigned Broken = AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos, InsertPosIndex); - + if (Broken != 0) { // We made changes. Update the dependency graph. // Theoretically we could update the graph in place: @@ -347,7 +337,7 @@ void SchedulePostRATDList::Schedule() { EntrySU = SUnit(); ExitSU = SUnit(); BuildSchedGraph(AA); - + NumFixedAnti += Broken; } } @@ -425,7 +415,7 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, MO.setIsKill(true); return false; } - + // If MO itself is live, clear the kill flag... if (KillIndices[MO.getReg()] != ~0u) { MO.setIsKill(false); @@ -464,7 +454,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { BitVector ReservedRegs = TRI->getReservedRegs(MF); StartBlockForKills(MBB); - + // Examine block from end to start... unsigned Count = MBB->size(); for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); @@ -484,9 +474,9 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { if (!MO.isDef()) continue; // Ignore two-addr defs. if (MI->isRegTiedToUseOperand(i)) continue; - + KillIndices[Reg] = ~0u; - + // Repeat for all subregs. for (const unsigned *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) { @@ -521,17 +511,17 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { if (kill) kill = (KillIndices[Reg] == ~0u); } - + if (MO.isKill() != kill) { DEBUG(dbgs() << "Fixing " << MO << " in "); // Warning: ToggleKillFlag may invalidate MO. ToggleKillFlag(MI, MO); DEBUG(MI->dump()); } - + killedRegs.insert(Reg); } - + // Mark any used register (that is not using undef) and subregs as // now live... for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -541,7 +531,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { if ((Reg == 0) || ReservedRegs.test(Reg)) continue; KillIndices[Reg] = Count; - + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) { KillIndices[*Subreg] = Count; @@ -573,7 +563,7 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { // available. This is the max of the start time of all predecessors plus // their latencies. SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); - + // If all the node's predecessors are scheduled, this node is ready // to be scheduled. Ignore the special ExitSU node. if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) @@ -594,9 +584,9 @@ void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) { void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); - + Sequence.push_back(SU); - assert(CurCycle >= SU->getDepth() && + assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); SU->setDepthToAtLeast(CurCycle); @@ -609,7 +599,7 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { /// schedulers. void SchedulePostRATDList::ListScheduleTopDown() { unsigned CurCycle = 0; - + // We're scheduling top-down but we're visiting the regions in // bottom-up order, so we don't know the hazards at the start of a // region. So assume no hazards (this should usually be ok as most diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp index 2d49beb..96e7327 100644 --- a/lib/CodeGen/PreAllocSplitting.cpp +++ b/lib/CodeGen/PreAllocSplitting.cpp @@ -882,7 +882,7 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg, !RefsInMBB.count(FoldPt)) --FoldPt; - int OpIdx = FoldPt->findRegisterDefOperandIdx(vreg, false); + int OpIdx = FoldPt->findRegisterDefOperandIdx(vreg); if (OpIdx == -1) return 0; @@ -1061,7 +1061,8 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { // Add spill. SS = CreateSpillStackSlot(CurrLI->reg, RC); - TII->storeRegToStackSlot(*BarrierMBB, SpillPt, CurrLI->reg, true, SS, RC); + TII->storeRegToStackSlot(*BarrierMBB, SpillPt, CurrLI->reg, true, SS, RC, + TRI); SpillMI = prior(SpillPt); SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI); } @@ -1097,7 +1098,8 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { } // Add spill. SS = CreateSpillStackSlot(CurrLI->reg, RC); - TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, false, SS, RC); + TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, false, SS, RC, + TRI); SpillMI = prior(SpillPt); SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI); } @@ -1116,7 +1118,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { RestoreIndex = LIs->getInstructionIndex(RestorePt); FoldedRestore = true; } else { - TII->loadRegFromStackSlot(*BarrierMBB, RestorePt, CurrLI->reg, SS, RC); + TII->loadRegFromStackSlot(*BarrierMBB, RestorePt, CurrLI->reg, SS, RC, TRI); MachineInstr *LoadMI = prior(RestorePt); RestoreIndex = LIs->InsertMachineInstrInMaps(LoadMI); } @@ -1152,7 +1154,7 @@ PreAllocSplitting::SplitRegLiveIntervals(const TargetRegisterClass **RCs, // codegen is not modelling. Ignore these barriers for now. if (!TII->isSafeToMoveRegClassDefs(*RC)) continue; - std::vector<unsigned> &VRs = MRI->getRegClassVirtRegs(*RC); + const std::vector<unsigned> &VRs = MRI->getRegClassVirtRegs(*RC); for (unsigned i = 0, e = VRs.size(); i != e; ++i) { unsigned Reg = VRs[i]; if (!LIs->hasInterval(Reg)) diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index d7179b3..62f525f 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -46,7 +46,7 @@ bool ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, const TargetInstrInfo *tii_) { unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - Reg == SrcReg) + Reg == SrcReg && SrcSubReg == 0 && DstSubReg == 0) return true; if (OpIdx == 2 && MI->isSubregToReg()) @@ -89,6 +89,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { MachineInstr *MI = &*I; ++I; if (MI->isImplicitDef()) { + if (MI->getOperand(0).getSubReg()) + continue; unsigned Reg = MI->getOperand(0).getReg(); ImpDefRegs.insert(Reg); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { @@ -218,7 +220,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { // Turn a copy use into an implicit_def. unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - Reg == SrcReg) { + Reg == SrcReg && SrcSubReg == 0 && DstSubReg == 0) { RMI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF)); bool isKill = false; diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index a454b62..e778024 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -58,8 +58,9 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); FrameConstantRegMap.clear(); - // Calculate the MaxCallFrameSize and HasCalls variables for the function's - // frame information. Also eliminates call frame pseudo instructions. + // Calculate the MaxCallFrameSize and AdjustsStack variables for the + // function's frame information. Also eliminates call frame pseudo + // instructions. calculateCallsInformation(Fn); // Allow the target machine to make some adjustments to the function @@ -91,8 +92,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // Add prolog and epilog code to the function. This function is required // to align the stack frame as necessary for any stack variables or - // called functions. Because of this, calculateCalleeSavedRegisters - // must be called before this function in order to set the HasCalls + // called functions. Because of this, calculateCalleeSavedRegisters() + // must be called before this function in order to set the AdjustsStack // and MaxCallFrameSize variables. if (!F->hasFnAttr(Attribute::Naked)) insertPrologEpilogCode(Fn); @@ -126,7 +127,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { } #endif -/// calculateCallsInformation - Calculate the MaxCallFrameSize and HasCalls +/// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack /// variables for the function's frame information and eliminate call frame /// pseudo instructions. void PEI::calculateCallsInformation(MachineFunction &Fn) { @@ -134,7 +135,7 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { MachineFrameInfo *MFI = Fn.getFrameInfo(); unsigned MaxCallFrameSize = 0; - bool HasCalls = MFI->hasCalls(); + bool AdjustsStack = MFI->adjustsStack(); // Get the function call frame set-up and tear-down instruction opcode int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode(); @@ -154,15 +155,15 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { " instructions should have a single immediate argument!"); unsigned Size = I->getOperand(0).getImm(); if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; - HasCalls = true; + AdjustsStack = true; FrameSDOps.push_back(I); } else if (I->isInlineAsm()) { // An InlineAsm might be a call; assume it is to get the stack frame // aligned correctly for calls. - HasCalls = true; + AdjustsStack = true; } - MFI->setHasCalls(HasCalls); + MFI->setAdjustsStack(AdjustsStack); MFI->setMaxCallFrameSize(MaxCallFrameSize); for (std::vector<MachineBasicBlock::iterator>::iterator @@ -289,12 +290,13 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { return; const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); + const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); MachineBasicBlock::iterator I; if (! ShrinkWrapThisFunction) { // Spill using target interface. I = EntryBlock->begin(); - if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI)) { + if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { // Add the callee-saved register as live-in. // It's killed at the spill. @@ -302,7 +304,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Insert the spill to the stack frame. TII.storeRegToStackSlot(*EntryBlock, I, CSI[i].getReg(), true, - CSI[i].getFrameIdx(), CSI[i].getRegClass()); + CSI[i].getFrameIdx(), CSI[i].getRegClass(),TRI); } } @@ -324,11 +326,11 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Restore all registers immediately before the return and any // terminators that preceed it. - if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI)) { + if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(), CSI[i].getFrameIdx(), - CSI[i].getRegClass()); + CSI[i].getRegClass(), TRI); assert(I != MBB->begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert @@ -375,7 +377,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(), true, blockCSI[i].getFrameIdx(), - blockCSI[i].getRegClass()); + blockCSI[i].getRegClass(), TRI); } } @@ -423,7 +425,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(), blockCSI[i].getFrameIdx(), - blockCSI[i].getRegClass()); + blockCSI[i].getRegClass(), TRI); assert(I != MBB->begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert @@ -576,7 +578,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // If we have reserved argument space for call sites in the function // immediately on entry to the current function, count it as part of the // overall stack size. - if (MFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn)) + if (MFI->adjustsStack() && RegInfo->hasReservedCallFrame(Fn)) Offset += MFI->getMaxCallFrameSize(); // Round up the size to a multiple of the alignment. If the function has @@ -585,13 +587,14 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // otherwise, for leaf functions, align to the TransientStackAlignment // value. unsigned StackAlign; - if (MFI->hasCalls() || MFI->hasVarSizedObjects() || + if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0)) StackAlign = TFI.getStackAlignment(); else StackAlign = TFI.getTransientStackAlignment(); - // If the frame pointer is eliminated, all frame offsets will be relative - // to SP not FP; align to MaxAlign so this works. + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. StackAlign = std::max(StackAlign, MaxAlign); unsigned AlignMask = StackAlign - 1; Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); @@ -601,7 +604,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { MFI->setStackSize(Offset - LocalAreaOffset); } - /// insertPrologEpilogCode - Scan the function for modified callee saved /// registers, insert spill code for these callee saved registers, then add /// prolog and epilog code to the function. @@ -620,7 +622,6 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { } } - /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical /// register references and actual offsets. /// diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 2caf1df..b3b5760 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -18,7 +18,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/Target/TargetInstrInfo.h" @@ -38,6 +37,7 @@ using namespace llvm; STATISTIC(NumStores, "Number of stores added"); STATISTIC(NumLoads , "Number of loads added"); +STATISTIC(NumCopies, "Number of copies coalesced"); static RegisterRegAlloc fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator); @@ -46,77 +46,80 @@ namespace { class RAFast : public MachineFunctionPass { public: static char ID; - RAFast() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1) {} + RAFast() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1), + isBulkSpilling(false) {} private: const TargetMachine *TM; MachineFunction *MF; + MachineRegisterInfo *MRI; const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; + // Basic block currently being allocated. + MachineBasicBlock *MBB; + // StackSlotForVirtReg - Maps virtual regs to the frame index where these // values are spilled. IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg; - // Virt2PhysRegMap - This map contains entries for each virtual register + // Everything we know about a live virtual register. + struct LiveReg { + MachineInstr *LastUse; // Last instr to use reg. + unsigned PhysReg; // Currently held here. + unsigned short LastOpNum; // OpNum on LastUse. + bool Dirty; // Register needs spill. + + LiveReg(unsigned p=0) : LastUse(0), PhysReg(p), LastOpNum(0), + Dirty(false) {} + }; + + typedef DenseMap<unsigned, LiveReg> LiveRegMap; + typedef LiveRegMap::value_type LiveRegEntry; + + // LiveVirtRegs - This map contains entries for each virtual register // that is currently available in a physical register. - IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap; + LiveRegMap LiveVirtRegs; - unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) { - return Virt2PhysRegMap[VirtReg]; - } + // RegState - Track the state of a physical register. + enum RegState { + // A disabled register is not available for allocation, but an alias may + // be in use. A register can only be moved out of the disabled state if + // all aliases are disabled. + regDisabled, - // PhysRegsUsed - This array is effectively a map, containing entries for - // each physical register that currently has a value (ie, it is in - // Virt2PhysRegMap). The value mapped to is the virtual register - // corresponding to the physical register (the inverse of the - // Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned - // because it is used by a future instruction, and to -2 if it is not - // allocatable. If the entry for a physical register is -1, then the - // physical register is "not in the map". - // - std::vector<int> PhysRegsUsed; + // A free register is not currently in use and can be allocated + // immediately without checking aliases. + regFree, + + // A reserved register has been assigned expolicitly (e.g., setting up a + // call parameter), and it remains reserved until it is used. + regReserved + + // A register state may also be a virtual register number, indication that + // the physical register is currently allocated to a virtual register. In + // that case, LiveVirtRegs contains the inverse mapping. + }; + + // PhysRegState - One of the RegState enums, or a virtreg. + std::vector<unsigned> PhysRegState; // UsedInInstr - BitVector of physregs that are used in the current // instruction, and so cannot be allocated. BitVector UsedInInstr; - // Virt2LastUseMap - This maps each virtual register to its last use - // (MachineInstr*, operand index pair). - IndexedMap<std::pair<MachineInstr*, unsigned>, VirtReg2IndexFunctor> - Virt2LastUseMap; - - std::pair<MachineInstr*,unsigned>& getVirtRegLastUse(unsigned Reg) { - assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); - return Virt2LastUseMap[Reg]; - } - - // VirtRegModified - This bitset contains information about which virtual - // registers need to be spilled back to memory when their registers are - // scavenged. If a virtual register has simply been rematerialized, there - // is no reason to spill it to memory when we need the register back. - // - BitVector VirtRegModified; - - // UsedInMultipleBlocks - Tracks whether a particular register is used in - // more than one block. - BitVector UsedInMultipleBlocks; - - void markVirtRegModified(unsigned Reg, bool Val = true) { - assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); - Reg -= TargetRegisterInfo::FirstVirtualRegister; - if (Val) - VirtRegModified.set(Reg); - else - VirtRegModified.reset(Reg); - } + // Allocatable - vector of allocatable physical registers. + BitVector Allocatable; - bool isVirtRegModified(unsigned Reg) const { - assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); - assert(Reg - TargetRegisterInfo::FirstVirtualRegister < - VirtRegModified.size() && "Illegal virtual register!"); - return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister]; - } + // isBulkSpilling - This flag is set when LiveRegMap will be cleared + // completely after spilling all live registers. LiveRegMap entries should + // not be erased. + bool isBulkSpilling; + enum { + spillClean = 1, + spillDirty = 100, + spillImpossible = ~0u + }; public: virtual const char *getPassName() const { return "Fast Register Allocator"; @@ -124,104 +127,34 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequired<LiveVariables>(); AU.addRequiredID(PHIEliminationID); AU.addRequiredID(TwoAddressInstructionPassID); MachineFunctionPass::getAnalysisUsage(AU); } private: - /// runOnMachineFunction - Register allocate the whole function bool runOnMachineFunction(MachineFunction &Fn); - - /// AllocateBasicBlock - Register allocate the specified basic block. - void AllocateBasicBlock(MachineBasicBlock &MBB); - - - /// areRegsEqual - This method returns true if the specified registers are - /// related to each other. To do this, it checks to see if they are equal - /// or if the first register is in the alias set of the second register. - /// - bool areRegsEqual(unsigned R1, unsigned R2) const { - if (R1 == R2) return true; - for (const unsigned *AliasSet = TRI->getAliasSet(R2); - *AliasSet; ++AliasSet) { - if (*AliasSet == R1) return true; - } - return false; - } - - /// getStackSpaceFor - This returns the frame index of the specified virtual - /// register on the stack, allocating space if necessary. + void AllocateBasicBlock(); int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); - - /// removePhysReg - This method marks the specified physical register as no - /// longer being in use. - /// - void removePhysReg(unsigned PhysReg); - - /// spillVirtReg - This method spills the value specified by PhysReg into - /// the virtual register slot specified by VirtReg. It then updates the RA - /// data structures to indicate the fact that PhysReg is now available. - /// - void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - unsigned VirtReg, unsigned PhysReg); - - /// spillPhysReg - This method spills the specified physical register into - /// the virtual register slot associated with it. If OnlyVirtRegs is set to - /// true, then the request is ignored if the physical register does not - /// contain a virtual register. - /// - void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, - unsigned PhysReg, bool OnlyVirtRegs = false); - - /// assignVirtToPhysReg - This method updates local state so that we know - /// that PhysReg is the proper container for VirtReg now. The physical - /// register must not be used for anything else when this is called. - /// - void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg); - - /// isPhysRegAvailable - Return true if the specified physical register is - /// free and available for use. This also includes checking to see if - /// aliased registers are all free... - /// - bool isPhysRegAvailable(unsigned PhysReg) const; - - /// isPhysRegSpillable - Can PhysReg be freed by spilling? - bool isPhysRegSpillable(unsigned PhysReg) const; - - /// getFreeReg - Look to see if there is a free register available in the - /// specified register class. If not, return 0. - /// - unsigned getFreeReg(const TargetRegisterClass *RC); - - /// getReg - Find a physical register to hold the specified virtual - /// register. If all compatible physical registers are used, this method - /// spills the last used virtual register to the stack, and uses that - /// register. If NoFree is true, that means the caller knows there isn't - /// a free register, do not call getFreeReg(). - unsigned getReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned VirtReg, bool NoFree = false); - - /// reloadVirtReg - This method transforms the specified virtual - /// register use to refer to a physical register. This method may do this - /// in one of several ways: if the register is available in a physical - /// register already, it uses that physical register. If the value is not - /// in a physical register, and if there are physical registers available, - /// it loads it into a register: PhysReg if that is an available physical - /// register, otherwise any physical register of the right class. - /// If register pressure is high, and it is possible, it tries to fold the - /// load of the virtual register into the instruction itself. It avoids - /// doing this if register pressure is low to improve the chance that - /// subsequent instructions can use the reloaded value. This method - /// returns the modified instruction. - /// - MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned OpNum, SmallSet<unsigned, 4> &RRegs, - unsigned PhysReg); - - void reloadPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I, - unsigned PhysReg); + bool isLastUseOfLocalReg(MachineOperand&); + + void addKillFlag(const LiveReg&); + void killVirtReg(LiveRegMap::iterator); + void killVirtReg(unsigned VirtReg); + void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator); + void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg); + + void usePhysReg(MachineOperand&); + void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState); + unsigned calcSpillCost(unsigned PhysReg) const; + void assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg); + void allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint); + LiveRegMap::iterator defineVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint); + LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint); + void spillAll(MachineInstr *MI); + bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg); }; char RAFast::ID = 0; } @@ -243,687 +176,668 @@ int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { return FrameIdx; } - -/// removePhysReg - This method marks the specified physical register as no -/// longer being in use. -/// -void RAFast::removePhysReg(unsigned PhysReg) { - PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used -} - - -/// spillVirtReg - This method spills the value specified by PhysReg into the -/// virtual register slot specified by VirtReg. It then updates the RA data -/// structures to indicate the fact that PhysReg is now available. +/// isLastUseOfLocalReg - Return true if MO is the only remaining reference to +/// its virtual register, and it is guaranteed to be a block-local register. /// -void RAFast::spillVirtReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned VirtReg, unsigned PhysReg) { - assert(VirtReg && "Spilling a physical register is illegal!" - " Must not have appropriate kill for the register or use exists beyond" - " the intended one."); - DEBUG(dbgs() << " Spilling register " << TRI->getName(PhysReg) - << " containing %reg" << VirtReg); - - if (!isVirtRegModified(VirtReg)) { - DEBUG(dbgs() << " which has not been modified, so no store necessary!"); - std::pair<MachineInstr*, unsigned> &LastUse = getVirtRegLastUse(VirtReg); - if (LastUse.first) - LastUse.first->getOperand(LastUse.second).setIsKill(); - } else { - // Otherwise, there is a virtual register corresponding to this physical - // register. We only need to spill it into its stack slot if it has been - // modified. - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); - int FrameIndex = getStackSpaceFor(VirtReg, RC); - DEBUG(dbgs() << " to stack slot #" << FrameIndex); - // If the instruction reads the register that's spilled, (e.g. this can - // happen if it is a move to a physical register), then the spill - // instruction is not a kill. - bool isKill = !(I != MBB.end() && I->readsRegister(PhysReg)); - TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC); - ++NumStores; // Update statistics - } +bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) { + // Check for non-debug uses or defs following MO. + // This is the most likely way to fail - fast path it. + MachineOperand *Next = &MO; + while ((Next = Next->getNextOperandForReg())) + if (!Next->isDebug()) + return false; - getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available + // If the register has ever been spilled or reloaded, we conservatively assume + // it is a global register used in multiple blocks. + if (StackSlotForVirtReg[MO.getReg()] != -1) + return false; - DEBUG(dbgs() << '\n'); - removePhysReg(PhysReg); + // Check that the use/def chain has exactly one operand - MO. + return &MRI->reg_nodbg_begin(MO.getReg()).getOperand() == &MO; } - -/// spillPhysReg - This method spills the specified physical register into the -/// virtual register slot associated with it. If OnlyVirtRegs is set to true, -/// then the request is ignored if the physical register does not contain a -/// virtual register. -/// -void RAFast::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, - unsigned PhysReg, bool OnlyVirtRegs) { - if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used! - assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!"); - if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs) - spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg); - return; +/// addKillFlag - Set kill flags on last use of a virtual register. +void RAFast::addKillFlag(const LiveReg &LR) { + if (!LR.LastUse) return; + MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum); + if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) { + if (MO.getReg() == LR.PhysReg) + MO.setIsKill(); + else + LR.LastUse->addRegisterKilled(LR.PhysReg, TRI, true); } +} - // If the selected register aliases any other registers, we must make - // sure that one of the aliases isn't alive. - for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); - *AliasSet; ++AliasSet) { - if (PhysRegsUsed[*AliasSet] == -1 || // Spill aliased register. - PhysRegsUsed[*AliasSet] == -2) // If allocatable. - continue; - - if (PhysRegsUsed[*AliasSet]) - spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet); - } +/// killVirtReg - Mark virtreg as no longer available. +void RAFast::killVirtReg(LiveRegMap::iterator LRI) { + addKillFlag(LRI->second); + const LiveReg &LR = LRI->second; + assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping"); + PhysRegState[LR.PhysReg] = regFree; + // Erase from LiveVirtRegs unless we're spilling in bulk. + if (!isBulkSpilling) + LiveVirtRegs.erase(LRI); } +/// killVirtReg - Mark virtreg as no longer available. +void RAFast::killVirtReg(unsigned VirtReg) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "killVirtReg needs a virtual register"); + LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg); + if (LRI != LiveVirtRegs.end()) + killVirtReg(LRI); +} -/// assignVirtToPhysReg - This method updates local state so that we know -/// that PhysReg is the proper container for VirtReg now. The physical -/// register must not be used for anything else when this is called. -/// -void RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { - assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!"); - // Update information to note the fact that this register was just used, and - // it holds VirtReg. - PhysRegsUsed[PhysReg] = VirtReg; - getVirt2PhysRegMapSlot(VirtReg) = PhysReg; - UsedInInstr.set(PhysReg); +/// spillVirtReg - This method spills the value specified by VirtReg into the +/// corresponding stack slot if needed. If isKill is set, the register is also +/// killed. +void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Spilling a physical register is illegal!"); + LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg); + assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register"); + spillVirtReg(MI, LRI); } +/// spillVirtReg - Do the actual work of spilling. +void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, + LiveRegMap::iterator LRI) { + LiveReg &LR = LRI->second; + assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping"); + + if (LR.Dirty) { + // If this physreg is used by the instruction, we want to kill it on the + // instruction, not on the spill. + bool SpillKill = LR.LastUse != MI; + LR.Dirty = false; + DEBUG(dbgs() << "Spilling %reg" << LRI->first + << " in " << TRI->getName(LR.PhysReg)); + const TargetRegisterClass *RC = MRI->getRegClass(LRI->first); + int FI = getStackSpaceFor(LRI->first, RC); + DEBUG(dbgs() << " to stack slot #" << FI << "\n"); + TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI); + ++NumStores; // Update statistics -/// isPhysRegAvailable - Return true if the specified physical register is free -/// and available for use. This also includes checking to see if aliased -/// registers are all free... -/// -bool RAFast::isPhysRegAvailable(unsigned PhysReg) const { - if (PhysRegsUsed[PhysReg] != -1) return false; - - // If the selected register aliases any other allocated registers, it is - // not free! - for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); - *AliasSet; ++AliasSet) - if (PhysRegsUsed[*AliasSet] >= 0) // Aliased register in use? - return false; // Can't use this reg then. - return true; + if (SpillKill) + LR.LastUse = 0; // Don't kill register again + } + killVirtReg(LRI); } -/// isPhysRegSpillable - Return true if the specified physical register can be -/// spilled for use in the current instruction. -/// -bool RAFast::isPhysRegSpillable(unsigned PhysReg) const { - // Test that PhysReg and all aliases are either free or assigned to a VirtReg - // that is not used in the instruction. - if (PhysRegsUsed[PhysReg] != -1 && - (PhysRegsUsed[PhysReg] <= 0 || UsedInInstr.test(PhysReg))) - return false; - - for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); - *AliasSet; ++AliasSet) - if (PhysRegsUsed[*AliasSet] != -1 && - (PhysRegsUsed[*AliasSet] <= 0 || UsedInInstr.test(*AliasSet))) - return false; - return true; +/// spillAll - Spill all dirty virtregs without killing them. +void RAFast::spillAll(MachineInstr *MI) { + if (LiveVirtRegs.empty()) return; + isBulkSpilling = true; + // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order + // of spilling here is deterministic, if arbitrary. + for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end(); + i != e; ++i) + spillVirtReg(MI, i); + LiveVirtRegs.clear(); + isBulkSpilling = false; } +/// usePhysReg - Handle the direct use of a physical register. +/// Check that the register is not used by a virtreg. +/// Kill the physreg, marking it free. +/// This may add implicit kills to MO->getParent() and invalidate MO. +void RAFast::usePhysReg(MachineOperand &MO) { + unsigned PhysReg = MO.getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && + "Bad usePhysReg operand"); + + switch (PhysRegState[PhysReg]) { + case regDisabled: + break; + case regReserved: + PhysRegState[PhysReg] = regFree; + // Fall through + case regFree: + UsedInInstr.set(PhysReg); + MO.setIsKill(); + return; + default: + // The physreg was allocated to a virtual register. That means to value we + // wanted has been clobbered. + llvm_unreachable("Instruction uses an allocated register"); + } -/// getFreeReg - Look to see if there is a free register available in the -/// specified register class. If not, return 0. -/// -unsigned RAFast::getFreeReg(const TargetRegisterClass *RC) { - // Get iterators defining the range of registers that are valid to allocate in - // this class, which also specifies the preferred allocation order. - TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF); - TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF); - - for (; RI != RE; ++RI) - if (isPhysRegAvailable(*RI)) { // Is reg unused? - assert(*RI != 0 && "Cannot use register!"); - return *RI; // Found an unused register! + // Maybe a superregister is reserved? + for (const unsigned *AS = TRI->getAliasSet(PhysReg); + unsigned Alias = *AS; ++AS) { + switch (PhysRegState[Alias]) { + case regDisabled: + break; + case regReserved: + assert(TRI->isSuperRegister(PhysReg, Alias) && + "Instruction is not using a subregister of a reserved register"); + // Leave the superregister in the working set. + PhysRegState[Alias] = regFree; + UsedInInstr.set(Alias); + MO.getParent()->addRegisterKilled(Alias, TRI, true); + return; + case regFree: + if (TRI->isSuperRegister(PhysReg, Alias)) { + // Leave the superregister in the working set. + UsedInInstr.set(Alias); + MO.getParent()->addRegisterKilled(Alias, TRI, true); + return; + } + // Some other alias was in the working set - clear it. + PhysRegState[Alias] = regDisabled; + break; + default: + llvm_unreachable("Instruction uses an alias of an allocated register"); } - return 0; + } + + // All aliases are disabled, bring register into working set. + PhysRegState[PhysReg] = regFree; + UsedInInstr.set(PhysReg); + MO.setIsKill(); } +/// definePhysReg - Mark PhysReg as reserved or free after spilling any +/// virtregs. This is very similar to defineVirtReg except the physreg is +/// reserved instead of allocated. +void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, + RegState NewState) { + UsedInInstr.set(PhysReg); + switch (unsigned VirtReg = PhysRegState[PhysReg]) { + case regDisabled: + break; + default: + spillVirtReg(MI, VirtReg); + // Fall through. + case regFree: + case regReserved: + PhysRegState[PhysReg] = NewState; + return; + } -/// getReg - Find a physical register to hold the specified virtual -/// register. If all compatible physical registers are used, this method spills -/// the last used virtual register to the stack, and uses that register. -/// -unsigned RAFast::getReg(MachineBasicBlock &MBB, MachineInstr *I, - unsigned VirtReg, bool NoFree) { - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + // This is a disabled register, disable all aliases. + PhysRegState[PhysReg] = NewState; + for (const unsigned *AS = TRI->getAliasSet(PhysReg); + unsigned Alias = *AS; ++AS) { + UsedInInstr.set(Alias); + switch (unsigned VirtReg = PhysRegState[Alias]) { + case regDisabled: + break; + default: + spillVirtReg(MI, VirtReg); + // Fall through. + case regFree: + case regReserved: + PhysRegState[Alias] = regDisabled; + if (TRI->isSuperRegister(PhysReg, Alias)) + return; + break; + } + } +} - // First check to see if we have a free register of the requested type... - unsigned PhysReg = NoFree ? 0 : getFreeReg(RC); - if (PhysReg != 0) { - // Assign the register. - assignVirtToPhysReg(VirtReg, PhysReg); - return PhysReg; +// calcSpillCost - Return the cost of spilling clearing out PhysReg and +// aliases so it is free for allocation. +// Returns 0 when PhysReg is free or disabled with all aliases disabled - it +// can be allocated directly. +// Returns spillImpossible when PhysReg or an alias can't be spilled. +unsigned RAFast::calcSpillCost(unsigned PhysReg) const { + if (UsedInInstr.test(PhysReg)) + return spillImpossible; + switch (unsigned VirtReg = PhysRegState[PhysReg]) { + case regDisabled: + break; + case regFree: + return 0; + case regReserved: + return spillImpossible; + default: + return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean; } - // If we didn't find an unused register, scavenge one now! Don't be fancy, - // just grab the first possible register. - TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF); - TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF); - - for (; RI != RE; ++RI) - if (isPhysRegSpillable(*RI)) { - PhysReg = *RI; + // This is a disabled register, add up const of aliases. + unsigned Cost = 0; + for (const unsigned *AS = TRI->getAliasSet(PhysReg); + unsigned Alias = *AS; ++AS) { + if (UsedInInstr.test(Alias)) + return spillImpossible; + switch (unsigned VirtReg = PhysRegState[Alias]) { + case regDisabled: + break; + case regFree: + ++Cost; + break; + case regReserved: + return spillImpossible; + default: + Cost += LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean; break; } - - assert(PhysReg && "Physical register not assigned!?!?"); - spillPhysReg(MBB, I, PhysReg); - assignVirtToPhysReg(VirtReg, PhysReg); - return PhysReg; + } + return Cost; } -/// reloadVirtReg - This method transforms the specified virtual -/// register use to refer to a physical register. This method may do this in -/// one of several ways: if the register is available in a physical register -/// already, it uses that physical register. If the value is not in a physical -/// register, and if there are physical registers available, it loads it into a -/// register: PhysReg if that is an available physical register, otherwise any -/// register. If register pressure is high, and it is possible, it tries to -/// fold the load of the virtual register into the instruction itself. It -/// avoids doing this if register pressure is low to improve the chance that -/// subsequent instructions can use the reloaded value. This method returns -/// the modified instruction. +/// assignVirtToPhysReg - This method updates local state so that we know +/// that PhysReg is the proper container for VirtReg now. The physical +/// register must not be used for anything else when this is called. /// -MachineInstr *RAFast::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned OpNum, - SmallSet<unsigned, 4> &ReloadedRegs, - unsigned PhysReg) { - unsigned VirtReg = MI->getOperand(OpNum).getReg(); - - // If the virtual register is already available, just update the instruction - // and return. - if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) { - MI->getOperand(OpNum).setReg(PR); // Assign the input register - if (!MI->isDebugValue()) { - // Do not do these for DBG_VALUE as they can affect codegen. - UsedInInstr.set(PR); - getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); +void RAFast::assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg) { + DEBUG(dbgs() << "Assigning %reg" << LRE.first << " to " + << TRI->getName(PhysReg) << "\n"); + PhysRegState[PhysReg] = LRE.first; + assert(!LRE.second.PhysReg && "Already assigned a physreg"); + LRE.second.PhysReg = PhysReg; +} + +/// allocVirtReg - Allocate a physical register for VirtReg. +void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { + const unsigned VirtReg = LRE.first; + + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Can only allocate virtual registers"); + + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); + + // Ignore invalid hints. + if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) || + !RC->contains(Hint) || !Allocatable.test(Hint))) + Hint = 0; + + // Take hint when possible. + if (Hint) { + switch(calcSpillCost(Hint)) { + default: + definePhysReg(MI, Hint, regFree); + // Fall through. + case 0: + return assignVirtToPhysReg(LRE, Hint); + case spillImpossible: + break; } - return MI; } - // Otherwise, we need to fold it into the current instruction, or reload it. - // If we have registers available to hold the value, use them. - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); - // If we already have a PhysReg (this happens when the instruction is a - // reg-to-reg copy with a PhysReg destination) use that. - if (!PhysReg || !TargetRegisterInfo::isPhysicalRegister(PhysReg) || - !isPhysRegAvailable(PhysReg)) - PhysReg = getFreeReg(RC); - int FrameIndex = getStackSpaceFor(VirtReg, RC); - - if (PhysReg) { // Register is available, allocate it! - assignVirtToPhysReg(VirtReg, PhysReg); - } else { // No registers available. - // Force some poor hapless value out of the register file to - // make room for the new register, and reload it. - PhysReg = getReg(MBB, MI, VirtReg, true); + TargetRegisterClass::iterator AOB = RC->allocation_order_begin(*MF); + TargetRegisterClass::iterator AOE = RC->allocation_order_end(*MF); + + // First try to find a completely free register. + for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) { + unsigned PhysReg = *I; + if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg)) + return assignVirtToPhysReg(LRE, PhysReg); } - markVirtRegModified(VirtReg, false); // Note that this reg was just reloaded + DEBUG(dbgs() << "Allocating %reg" << VirtReg << " from " << RC->getName() + << "\n"); + + unsigned BestReg = 0, BestCost = spillImpossible; + for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) { + unsigned Cost = calcSpillCost(*I); + // Cost is 0 when all aliases are already disabled. + if (Cost == 0) + return assignVirtToPhysReg(LRE, *I); + if (Cost < BestCost) + BestReg = *I, BestCost = Cost; + } - DEBUG(dbgs() << " Reloading %reg" << VirtReg << " into " - << TRI->getName(PhysReg) << "\n"); + if (BestReg) { + definePhysReg(MI, BestReg, regFree); + return assignVirtToPhysReg(LRE, BestReg); + } - // Add move instruction(s) - TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC); - ++NumLoads; // Update statistics - - MF->getRegInfo().setPhysRegUsed(PhysReg); - MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register - getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); - - if (!ReloadedRegs.insert(PhysReg)) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Ran out of registers during register allocation!"; - if (MI->isInlineAsm()) { - Msg << "\nPlease check your inline asm statement for invalid " - << "constraints:\n"; - MI->print(Msg, TM); - } - report_fatal_error(Msg.str()); + // Nothing we can do. + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Ran out of registers during register allocation!"; + if (MI->isInlineAsm()) { + Msg << "\nPlease check your inline asm statement for " + << "invalid constraints:\n"; + MI->print(Msg, TM); } - for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); - *SubRegs; ++SubRegs) { - if (ReloadedRegs.insert(*SubRegs)) continue; - - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Ran out of registers during register allocation!"; - if (MI->isInlineAsm()) { - Msg << "\nPlease check your inline asm statement for invalid " - << "constraints:\n"; - MI->print(Msg, TM); + report_fatal_error(Msg.str()); +} + +/// defineVirtReg - Allocate a register for VirtReg and mark it as dirty. +RAFast::LiveRegMap::iterator +RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Not a virtual register"); + LiveRegMap::iterator LRI; + bool New; + tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg())); + LiveReg &LR = LRI->second; + bool PartialRedef = MI->getOperand(OpNum).getSubReg(); + if (New) { + // If there is no hint, peek at the only use of this register. + if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) && + MRI->hasOneNonDBGUse(VirtReg)) { + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + // It's a copy, use the destination register as a hint. + if (TII->isMoveInstr(*MRI->use_nodbg_begin(VirtReg), + SrcReg, DstReg, SrcSubReg, DstSubReg)) + Hint = DstReg; + } + allocVirtReg(MI, *LRI, Hint); + // If this is only a partial redefinition, we must reload the other parts. + if (PartialRedef && MI->readsVirtualRegister(VirtReg)) { + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); + int FI = getStackSpaceFor(VirtReg, RC); + DEBUG(dbgs() << "Reloading for partial redef: %reg" << VirtReg << "\n"); + TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FI, RC, TRI); + ++NumLoads; } - report_fatal_error(Msg.str()); + } else if (LR.LastUse && !PartialRedef) { + // Redefining a live register - kill at the last use, unless it is this + // instruction defining VirtReg multiple times. + if (LR.LastUse != MI || LR.LastUse->getOperand(LR.LastOpNum).isUse()) + addKillFlag(LR); } - - return MI; + assert(LR.PhysReg && "Register not assigned"); + LR.LastUse = MI; + LR.LastOpNum = OpNum; + LR.Dirty = true; + UsedInInstr.set(LR.PhysReg); + return LRI; } -/// isReadModWriteImplicitKill - True if this is an implicit kill for a -/// read/mod/write register, i.e. update partial register. -static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && - MO.isDef() && !MO.isDead()) - return true; +/// reloadVirtReg - Make sure VirtReg is available in a physreg and return it. +RAFast::LiveRegMap::iterator +RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Not a virtual register"); + LiveRegMap::iterator LRI; + bool New; + tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg())); + LiveReg &LR = LRI->second; + MachineOperand &MO = MI->getOperand(OpNum); + if (New) { + allocVirtReg(MI, *LRI, Hint); + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + DEBUG(dbgs() << "Reloading %reg" << VirtReg << " into " + << TRI->getName(LR.PhysReg) << "\n"); + TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FrameIndex, RC, TRI); + ++NumLoads; + } else if (LR.Dirty) { + if (isLastUseOfLocalReg(MO)) { + DEBUG(dbgs() << "Killing last use: " << MO << "\n"); + MO.setIsKill(); + } else if (MO.isKill()) { + DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n"); + MO.setIsKill(false); + } + } else if (MO.isKill()) { + // We must remove kill flags from uses of reloaded registers because the + // register would be killed immediately, and there might be a second use: + // %foo = OR %x<kill>, %x + // This would cause a second reload of %x into a different register. + DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n"); + MO.setIsKill(false); } - return false; + assert(LR.PhysReg && "Register not assigned"); + LR.LastUse = MI; + LR.LastOpNum = OpNum; + UsedInInstr.set(LR.PhysReg); + return LRI; } -/// isReadModWriteImplicitDef - True if this is an implicit def for a -/// read/mod/write register, i.e. update partial register. -static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && - !MO.isDef() && MO.isKill()) - return true; +// setPhysReg - Change operand OpNum in MI the refer the PhysReg, considering +// subregs. This may invalidate any operand pointers. +// Return true if the operand kills its register. +bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) { + MachineOperand &MO = MI->getOperand(OpNum); + if (!MO.getSubReg()) { + MO.setReg(PhysReg); + return MO.isKill() || MO.isDead(); } - return false; -} -void RAFast::AllocateBasicBlock(MachineBasicBlock &MBB) { - // loop over each instruction - MachineBasicBlock::iterator MII = MBB.begin(); - - DEBUG({ - const BasicBlock *LBB = MBB.getBasicBlock(); - if (LBB) - dbgs() << "\nStarting RegAlloc of BB: " << LBB->getName(); - }); - - // Add live-in registers as active. - for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(), - E = MBB.livein_end(); I != E; ++I) { - unsigned Reg = *I; - MF->getRegInfo().setPhysRegUsed(Reg); - PhysRegsUsed[Reg] = 0; // It is free and reserved now - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] == -2) continue; - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now - MF->getRegInfo().setPhysRegUsed(*SubRegs); - } + // Handle subregister index. + MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0); + MO.setSubReg(0); + + // A kill flag implies killing the full register. Add corresponding super + // register kill. + if (MO.isKill()) { + MI->addRegisterKilled(PhysReg, TRI, true); + return true; } + return MO.isDead(); +} + +void RAFast::AllocateBasicBlock() { + DEBUG(dbgs() << "\nAllocating " << *MBB); + + PhysRegState.assign(TRI->getNumRegs(), regDisabled); + assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?"); + + MachineBasicBlock::iterator MII = MBB->begin(); + + // Add live-in registers as live. + for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), + E = MBB->livein_end(); I != E; ++I) + definePhysReg(MII, *I, regReserved); + + SmallVector<unsigned, 8> PhysECs, VirtDead; + SmallVector<MachineInstr*, 32> Coalesced; // Otherwise, sequentially allocate each instruction in the MBB. - while (MII != MBB.end()) { + while (MII != MBB->end()) { MachineInstr *MI = MII++; const TargetInstrDesc &TID = MI->getDesc(); DEBUG({ - dbgs() << "\nStarting RegAlloc of: " << *MI; - dbgs() << " Regs have values: "; - for (unsigned i = 0; i != TRI->getNumRegs(); ++i) - if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) - dbgs() << "[" << TRI->getName(i) - << ",%reg" << PhysRegsUsed[i] << "] "; + dbgs() << "\n>> " << *MI << "Regs:"; + for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) { + if (PhysRegState[Reg] == regDisabled) continue; + dbgs() << " " << TRI->getName(Reg); + switch(PhysRegState[Reg]) { + case regFree: + break; + case regReserved: + dbgs() << "*"; + break; + default: + dbgs() << "=%reg" << PhysRegState[Reg]; + if (LiveVirtRegs[PhysRegState[Reg]].Dirty) + dbgs() << "*"; + assert(LiveVirtRegs[PhysRegState[Reg]].PhysReg == Reg && + "Bad inverse map"); + break; + } + } dbgs() << '\n'; + // Check that LiveVirtRegs is the inverse. + for (LiveRegMap::iterator i = LiveVirtRegs.begin(), + e = LiveVirtRegs.end(); i != e; ++i) { + assert(TargetRegisterInfo::isVirtualRegister(i->first) && + "Bad map key"); + assert(TargetRegisterInfo::isPhysicalRegister(i->second.PhysReg) && + "Bad map value"); + assert(PhysRegState[i->second.PhysReg] == i->first && + "Bad inverse map"); + } }); - // Track registers used by instruction. - UsedInInstr.reset(); - - // Determine whether this is a copy instruction. The cases where the - // source or destination are phys regs are handled specially. - unsigned SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg; - unsigned SrcCopyPhysReg = 0U; - bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, - SrcCopySubReg, DstCopySubReg); - if (isCopy && TargetRegisterInfo::isVirtualRegister(SrcCopyReg)) - SrcCopyPhysReg = getVirt2PhysRegMapSlot(SrcCopyReg); - - // Loop over the implicit uses, making sure they don't get reallocated. - if (TID.ImplicitUses) { - for (const unsigned *ImplicitUses = TID.ImplicitUses; - *ImplicitUses; ++ImplicitUses) - UsedInInstr.set(*ImplicitUses); - } - - SmallVector<unsigned, 8> Kills; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isKill()) continue; - - if (!MO.isImplicit()) - Kills.push_back(MO.getReg()); - else if (!isReadModWriteImplicitKill(MI, MO.getReg())) - // These are extra physical register kills when a sub-register - // is defined (def of a sub-register is a read/mod/write of the - // larger registers). Ignore. - Kills.push_back(MO.getReg()); - } - - // If any physical regs are earlyclobber, spill any value they might - // have in them, then mark them unallocatable. - // If any virtual regs are earlyclobber, allocate them now (before - // freeing inputs that are killed). - if (MI->isInlineAsm()) { + // Debug values are not allowed to change codegen in any way. + if (MI->isDebugValue()) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber() || - !MO.getReg()) - continue; - - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { - unsigned DestVirtReg = MO.getReg(); - unsigned DestPhysReg; - - // If DestVirtReg already has a value, use it. - if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) - DestPhysReg = getReg(MBB, MI, DestVirtReg); - MF->getRegInfo().setPhysRegUsed(DestPhysReg); - markVirtRegModified(DestVirtReg); - getVirtRegLastUse(DestVirtReg) = - std::make_pair((MachineInstr*)0, 0); - DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) - << " to %reg" << DestVirtReg << "\n"); - MO.setReg(DestPhysReg); // Assign the earlyclobber register - } else { - unsigned Reg = MO.getReg(); - if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. - // These are extra physical register defs when a sub-register - // is defined (def of a sub-register is a read/mod/write of the - // larger registers). Ignore. - if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; - - MF->getRegInfo().setPhysRegUsed(Reg); - spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg - PhysRegsUsed[Reg] = 0; // It is free and reserved now - - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] == -2) continue; - MF->getRegInfo().setPhysRegUsed(*SubRegs); - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now - } - } + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg); + if (LRI != LiveVirtRegs.end()) + setPhysReg(MI, i, LRI->second.PhysReg); + else + MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry! } + // Next instruction. + continue; } - // If a DBG_VALUE says something is located in a spilled register, - // change the DBG_VALUE to be undef, which prevents the register - // from being reloaded here. Doing that would change the generated - // code, unless another use immediately follows this instruction. - if (MI->isDebugValue() && - MI->getNumOperands()==3 && MI->getOperand(0).isReg()) { - unsigned VirtReg = MI->getOperand(0).getReg(); - if (VirtReg && TargetRegisterInfo::isVirtualRegister(VirtReg) && - !getVirt2PhysRegMapSlot(VirtReg)) - MI->getOperand(0).setReg(0U); - } + // If this is a copy, we may be able to coalesce. + unsigned CopySrc, CopyDst, CopySrcSub, CopyDstSub; + if (!TII->isMoveInstr(*MI, CopySrc, CopyDst, CopySrcSub, CopyDstSub)) + CopySrc = CopyDst = 0; - // Get the used operands into registers. This has the potential to spill - // incoming values if we are out of registers. Note that we completely - // ignore physical register uses here. We assume that if an explicit - // physical register is referenced by the instruction, that it is guaranteed - // to be live-in, or the input is badly hosed. - // - SmallSet<unsigned, 4> ReloadedRegs; - for (unsigned i = 0; i != MI->getNumOperands(); ++i) { - MachineOperand &MO = MI->getOperand(i); - // here we are looking for only used operands (never def&use) - if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())) - MI = reloadVirtReg(MBB, MI, i, ReloadedRegs, - isCopy ? DstCopyReg : 0); - } + // Track registers used by instruction. + UsedInInstr.reset(); + PhysECs.clear(); - // If this instruction is the last user of this register, kill the - // value, freeing the register being used, so it doesn't need to be - // spilled to memory. - // - for (unsigned i = 0, e = Kills.size(); i != e; ++i) { - unsigned VirtReg = Kills[i]; - unsigned PhysReg = VirtReg; - if (TargetRegisterInfo::isVirtualRegister(VirtReg)) { - // If the virtual register was never materialized into a register, it - // might not be in the map, but it won't hurt to zero it out anyway. - unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); - PhysReg = PhysRegSlot; - PhysRegSlot = 0; - } else if (PhysRegsUsed[PhysReg] == -2) { - // Unallocatable register dead, ignore. + // First scan. + // Mark physreg uses and early clobbers as used. + // Find the end of the virtreg operands + unsigned VirtOpEnd = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!Reg) continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + VirtOpEnd = i+1; continue; - } else { - assert((!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1) && - "Silently clearing a virtual register?"); } - - if (!PhysReg) continue; - - DEBUG(dbgs() << " Last use of " << TRI->getName(PhysReg) - << "[%reg" << VirtReg <<"], removing it from live set\n"); - removePhysReg(PhysReg); - for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] != -2) { - DEBUG(dbgs() << " Last use of " - << TRI->getName(*SubRegs) << "[%reg" << VirtReg - <<"], removing it from live set\n"); - removePhysReg(*SubRegs); - } + if (!Allocatable.test(Reg)) continue; + if (MO.isUse()) { + usePhysReg(MO); + } else if (MO.isEarlyClobber()) { + definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); + PhysECs.push_back(Reg); } } - // Track registers defined by instruction. - UsedInInstr.reset(); - - // Loop over all of the operands of the instruction, spilling registers that - // are defined, and marking explicit destinations in the PhysRegsUsed map. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + // Second scan. + // Allocate virtreg uses and early clobbers. + // Collect VirtKills + for (unsigned i = 0; i != VirtOpEnd; ++i) { MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || MO.isImplicit() || !MO.getReg() || - MO.isEarlyClobber() || - !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) - continue; - + if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. - // These are extra physical register defs when a sub-register - // is defined (def of a sub-register is a read/mod/write of the - // larger registers). Ignore. - if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; - - MF->getRegInfo().setPhysRegUsed(Reg); - spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg - PhysRegsUsed[Reg] = 0; // It is free and reserved now - - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] == -2) continue; - - MF->getRegInfo().setPhysRegUsed(*SubRegs); - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now + if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + if (MO.isUse()) { + LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst); + unsigned PhysReg = LRI->second.PhysReg; + CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0; + if (setPhysReg(MI, i, PhysReg)) + killVirtReg(LRI); + } else if (MO.isEarlyClobber()) { + // Note: defineVirtReg may invalidate MO. + LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0); + unsigned PhysReg = LRI->second.PhysReg; + setPhysReg(MI, i, PhysReg); + PhysECs.push_back(PhysReg); } } - // Loop over the implicit defs, spilling them as well. - if (TID.ImplicitDefs) { - for (const unsigned *ImplicitDefs = TID.ImplicitDefs; - *ImplicitDefs; ++ImplicitDefs) { - unsigned Reg = *ImplicitDefs; - if (PhysRegsUsed[Reg] != -2) { - spillPhysReg(MBB, MI, Reg, true); - PhysRegsUsed[Reg] = 0; // It is free and reserved now - } - MF->getRegInfo().setPhysRegUsed(Reg); - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] == -2) continue; + MRI->addPhysRegsUsed(UsedInInstr); - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now - MF->getRegInfo().setPhysRegUsed(*SubRegs); - } - } + // Track registers defined by instruction - early clobbers at this point. + UsedInInstr.reset(); + for (unsigned i = 0, e = PhysECs.size(); i != e; ++i) { + unsigned PhysReg = PhysECs[i]; + UsedInInstr.set(PhysReg); + for (const unsigned *AS = TRI->getAliasSet(PhysReg); + unsigned Alias = *AS; ++AS) + UsedInInstr.set(Alias); } - SmallVector<unsigned, 8> DeadDefs; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isDead()) - DeadDefs.push_back(MO.getReg()); + unsigned DefOpEnd = MI->getNumOperands(); + if (TID.isCall()) { + // Spill all virtregs before a call. This serves two purposes: 1. If an + // exception is thrown, the landing pad is going to expect to find registers + // in their spill slots, and 2. we don't have to wade through all the + // <imp-def> operands on the call instruction. + DefOpEnd = VirtOpEnd; + DEBUG(dbgs() << " Spilling remaining registers before call.\n"); + spillAll(MI); } - // Okay, we have allocated all of the source operands and spilled any values - // that would be destroyed by defs of this instruction. Loop over the - // explicit defs and assign them to a register, spilling incoming values if - // we need to scavenge a register. - // - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + // Third scan. + // Allocate defs and collect dead defs. + for (unsigned i = 0; i != DefOpEnd; ++i) { MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || !MO.getReg() || - MO.isEarlyClobber() || - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; - - unsigned DestVirtReg = MO.getReg(); - unsigned DestPhysReg; - - // If DestVirtReg already has a value, use it. - if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) { - // If this is a copy try to reuse the input as the output; - // that will make the copy go away. - // If this is a copy, the source reg is a phys reg, and - // that reg is available, use that phys reg for DestPhysReg. - // If this is a copy, the source reg is a virtual reg, and - // the phys reg that was assigned to that virtual reg is now - // available, use that phys reg for DestPhysReg. (If it's now - // available that means this was the last use of the source.) - if (isCopy && - TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) && - isPhysRegAvailable(SrcCopyReg)) { - DestPhysReg = SrcCopyReg; - assignVirtToPhysReg(DestVirtReg, DestPhysReg); - } else if (isCopy && - TargetRegisterInfo::isVirtualRegister(SrcCopyReg) && - SrcCopyPhysReg && isPhysRegAvailable(SrcCopyPhysReg) && - MF->getRegInfo().getRegClass(DestVirtReg)-> - contains(SrcCopyPhysReg)) { - DestPhysReg = SrcCopyPhysReg; - assignVirtToPhysReg(DestVirtReg, DestPhysReg); - } else - DestPhysReg = getReg(MBB, MI, DestVirtReg); - } - MF->getRegInfo().setPhysRegUsed(DestPhysReg); - markVirtRegModified(DestVirtReg); - getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0); - DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) - << " to %reg" << DestVirtReg << "\n"); - MO.setReg(DestPhysReg); // Assign the output register - UsedInInstr.set(DestPhysReg); - } + if (!MO.isReg() || !MO.isDef() || !MO.getReg()) continue; + unsigned Reg = MO.getReg(); - // If this instruction defines any registers that are immediately dead, - // kill them now. - // - for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) { - unsigned VirtReg = DeadDefs[i]; - unsigned PhysReg = VirtReg; - if (TargetRegisterInfo::isVirtualRegister(VirtReg)) { - unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); - PhysReg = PhysRegSlot; - assert(PhysReg != 0); - PhysRegSlot = 0; - } else if (PhysRegsUsed[PhysReg] == -2) { - // Unallocatable register dead, ignore. - continue; - } else if (!PhysReg) + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (!Allocatable.test(Reg)) continue; + definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ? + regFree : regReserved); continue; - - DEBUG(dbgs() << " Register " << TRI->getName(PhysReg) - << " [%reg" << VirtReg - << "] is never used, removing it from live set\n"); - removePhysReg(PhysReg); - for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); - *AliasSet; ++AliasSet) { - if (PhysRegsUsed[*AliasSet] != -2) { - DEBUG(dbgs() << " Register " << TRI->getName(*AliasSet) - << " [%reg" << *AliasSet - << "] is never used, removing it from live set\n"); - removePhysReg(*AliasSet); - } } + LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc); + unsigned PhysReg = LRI->second.PhysReg; + if (setPhysReg(MI, i, PhysReg)) { + VirtDead.push_back(Reg); + CopyDst = 0; // cancel coalescing; + } else + CopyDst = (CopyDst == Reg || CopyDst == PhysReg) ? PhysReg : 0; } - // Finally, if this is a noop copy instruction, zap it. (Except that if - // the copy is dead, it must be kept to avoid messing up liveness info for - // the register scavenger. See pr4100.) - if (TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, - SrcCopySubReg, DstCopySubReg) && - SrcCopyReg == DstCopyReg && DeadDefs.empty()) - MBB.erase(MI); + // Kill dead defs after the scan to ensure that multiple defs of the same + // register are allocated identically. We didn't need to do this for uses + // because we are crerating our own kill flags, and they are always at the + // last use. + for (unsigned i = 0, e = VirtDead.size(); i != e; ++i) + killVirtReg(VirtDead[i]); + VirtDead.clear(); + + MRI->addPhysRegsUsed(UsedInInstr); + + if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) { + DEBUG(dbgs() << "-- coalescing: " << *MI); + Coalesced.push_back(MI); + } else { + DEBUG(dbgs() << "<< " << *MI); + } } - MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); - // Spill all physical registers holding virtual registers now. - for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) - if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) { - if (unsigned VirtReg = PhysRegsUsed[i]) - spillVirtReg(MBB, MI, VirtReg, i); - else - removePhysReg(i); - } + DEBUG(dbgs() << "Spilling live registers at end of block.\n"); + spillAll(MBB->getFirstTerminator()); + + // Erase all the coalesced copies. We are delaying it until now because + // LiveVirtRegs might refer to the instrs. + for (unsigned i = 0, e = Coalesced.size(); i != e; ++i) + MBB->erase(Coalesced[i]); + NumCopies += Coalesced.size(); + + DEBUG(MBB->dump()); } /// runOnMachineFunction - Register allocate the whole function /// bool RAFast::runOnMachineFunction(MachineFunction &Fn) { - DEBUG(dbgs() << "Machine Function\n"); + DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n" + << "********** Function: " + << ((Value*)Fn.getFunction())->getName() << '\n'); MF = &Fn; + MRI = &MF->getRegInfo(); TM = &Fn.getTarget(); TRI = TM->getRegisterInfo(); TII = TM->getInstrInfo(); - PhysRegsUsed.assign(TRI->getNumRegs(), -1); UsedInInstr.resize(TRI->getNumRegs()); - - // At various places we want to efficiently check to see whether a register - // is allocatable. To handle this, we mark all unallocatable registers as - // being pinned down, permanently. - { - BitVector Allocable = TRI->getAllocatableSet(Fn); - for (unsigned i = 0, e = Allocable.size(); i != e; ++i) - if (!Allocable[i]) - PhysRegsUsed[i] = -2; // Mark the reg unallocable. - } + Allocatable = TRI->getAllocatableSet(*MF); // initialize the virtual->physical register map to have a 'null' // mapping for all virtual registers - unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg(); + unsigned LastVirtReg = MRI->getLastVirtReg(); StackSlotForVirtReg.grow(LastVirtReg); - Virt2PhysRegMap.grow(LastVirtReg); - Virt2LastUseMap.grow(LastVirtReg); - VirtRegModified.resize(LastVirtReg+1 - - TargetRegisterInfo::FirstVirtualRegister); - UsedInMultipleBlocks.resize(LastVirtReg+1 - - TargetRegisterInfo::FirstVirtualRegister); // Loop over all of the basic blocks, eliminating virtual register references - for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); - MBB != MBBe; ++MBB) - AllocateBasicBlock(*MBB); + for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end(); + MBBi != MBBe; ++MBBi) { + MBB = &*MBBi; + AllocateBasicBlock(); + } + + // Make sure the set of used physregs is closed under subreg operations. + MRI->closePhysRegsUsed(*TRI); StackSlotForVirtReg.clear(); - PhysRegsUsed.clear(); - VirtRegModified.clear(); - UsedInMultipleBlocks.clear(); - Virt2PhysRegMap.clear(); - Virt2LastUseMap.clear(); return true; } diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index 6c8fc0c..bc331f0 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -809,7 +809,7 @@ float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_, MachineInstr *MI = &*I; if (cur->liveAt(li_->getInstructionIndex(MI))) { unsigned loopDepth = loopInfo->getLoopDepth(MI->getParent()); - Conflicts += powf(10.0f, (float)loopDepth); + Conflicts += std::pow(10.0f, (float)loopDepth); } } return Conflicts; diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp index 94456d1..321ae12 100644 --- a/lib/CodeGen/RegAllocLocal.cpp +++ b/lib/CodeGen/RegAllocLocal.cpp @@ -37,6 +37,7 @@ using namespace llvm; STATISTIC(NumStores, "Number of stores added"); STATISTIC(NumLoads , "Number of loads added"); +STATISTIC(NumCopies, "Number of copies coalesced"); static RegisterRegAlloc localRegAlloc("local", "local register allocator", @@ -50,6 +51,7 @@ namespace { private: const TargetMachine *TM; MachineFunction *MF; + MachineRegisterInfo *MRI; const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; @@ -297,8 +299,18 @@ void RALocal::storeVirtReg(MachineBasicBlock &MBB, const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); int FrameIndex = getStackSpaceFor(VirtReg, RC); DEBUG(dbgs() << " to stack slot #" << FrameIndex); - TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC); + TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC, TRI); ++NumStores; // Update statistics + + // Mark the spill instruction as last use if we're not killing the register. + if (!isKill) { + MachineInstr *Spill = llvm::prior(I); + int OpNum = Spill->findRegisterUseOperandIdx(PhysReg); + if (OpNum < 0) + getVirtRegLastUse(VirtReg) = std::make_pair((MachineInstr*)0, 0); + else + getVirtRegLastUse(VirtReg) = std::make_pair(Spill, OpNum); + } } /// spillVirtReg - This method spills the value specified by PhysReg into the @@ -506,10 +518,15 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, SmallSet<unsigned, 4> &ReloadedRegs, unsigned PhysReg) { unsigned VirtReg = MI->getOperand(OpNum).getReg(); + unsigned SubIdx = MI->getOperand(OpNum).getSubReg(); // If the virtual register is already available, just update the instruction // and return. if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) { + if (SubIdx) { + PR = TRI->getSubReg(PR, SubIdx); + MI->getOperand(OpNum).setSubReg(0); + } MI->getOperand(OpNum).setReg(PR); // Assign the input register if (!MI->isDebugValue()) { // Do not do these for DBG_VALUE as they can affect codegen. @@ -543,11 +560,16 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, << TRI->getName(PhysReg) << "\n"); // Add move instruction(s) - TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC); + TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC, TRI); ++NumLoads; // Update statistics MF->getRegInfo().setPhysRegUsed(PhysReg); - MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register + // Assign the input register. + if (SubIdx) { + MI->getOperand(OpNum).setSubReg(0); + MI->getOperand(OpNum).setReg(TRI->getSubReg(PhysReg, SubIdx)); + } else + MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); if (!ReloadedRegs.insert(PhysReg)) { @@ -626,7 +648,6 @@ static bool precedes(MachineBasicBlock::iterator A, /// ComputeLocalLiveness - Computes liveness of registers within a basic /// block, setting the killed/dead flags as appropriate. void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { - MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); // Keep track of the most recently seen previous use or def of each reg, // so that we can update them with dead/kill markers. DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > LastUseDef; @@ -672,18 +693,26 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { // - A def followed by a def is dead // - A use followed by a def is a kill if (!MO.isReg() || !MO.getReg() || !MO.isDef()) continue; - + + unsigned SubIdx = MO.getSubReg(); DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator last = LastUseDef.find(MO.getReg()); if (last != LastUseDef.end()) { // Check if this is a two address instruction. If so, then // the def does not kill the use. - if (last->second.first == I && - I->isRegTiedToUseOperand(i)) + if (last->second.first == I && I->isRegTiedToUseOperand(i)) continue; MachineOperand &lastUD = last->second.first->getOperand(last->second.second); + if (SubIdx && lastUD.getSubReg() != SubIdx) + // Partial re-def, the last def is not dead. + // %reg1024:5<def> = + // %reg1024:6<def> = + // or + // %reg1024:5<def> = op %reg1024, 5 + continue; + if (lastUD.isDef()) lastUD.setIsDead(true); else @@ -732,8 +761,8 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { // it wouldn't have been otherwise. Nullify the DBG_VALUEs when that // happens. bool UsedByDebugValueOnly = false; - for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()), - UE = MRI.reg_end(); UI != UE; ++UI) { + for (MachineRegisterInfo::reg_iterator UI = MRI->reg_begin(MO.getReg()), + UE = MRI->reg_end(); UI != UE; ++UI) { // Two cases: // - used in another block // - used in the same block before it is defined (loop) @@ -755,8 +784,8 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { } if (UsedByDebugValueOnly) - for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()), - UE = MRI.reg_end(); UI != UE; ++UI) + for (MachineRegisterInfo::reg_iterator UI = MRI->reg_begin(MO.getReg()), + UE = MRI->reg_end(); UI != UE; ++UI) if (UI->isDebugValue() && (UI->getParent() != &MBB || (MO.isDef() && precedes(&*UI, MI)))) @@ -828,7 +857,8 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { unsigned SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg; unsigned SrcCopyPhysReg = 0U; bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, - SrcCopySubReg, DstCopySubReg); + SrcCopySubReg, DstCopySubReg) && + SrcCopySubReg == DstCopySubReg; if (isCopy && TargetRegisterInfo::isVirtualRegister(SrcCopyReg)) SrcCopyPhysReg = getVirt2PhysRegMapSlot(SrcCopyReg); @@ -878,6 +908,10 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { std::make_pair((MachineInstr*)0, 0); DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) << " to %reg" << DestVirtReg << "\n"); + if (unsigned DestSubIdx = MO.getSubReg()) { + MO.setSubReg(0); + DestPhysReg = TRI->getSubReg(DestPhysReg, DestSubIdx); + } MO.setReg(DestPhysReg); // Assign the earlyclobber register } else { unsigned Reg = MO.getReg(); @@ -1073,6 +1107,11 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0); DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) << " to %reg" << DestVirtReg << "\n"); + + if (unsigned DestSubIdx = MO.getSubReg()) { + MO.setSubReg(0); + DestPhysReg = TRI->getSubReg(DestPhysReg, DestSubIdx); + } MO.setReg(DestPhysReg); // Assign the output register } @@ -1127,8 +1166,11 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { // the register scavenger. See pr4100.) if (TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg) && - SrcCopyReg == DstCopyReg && DeadDefs.empty()) + SrcCopyReg == DstCopyReg && SrcCopySubReg == DstCopySubReg && + DeadDefs.empty()) { + ++NumCopies; MBB.erase(MI); + } } MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); @@ -1165,6 +1207,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { bool RALocal::runOnMachineFunction(MachineFunction &Fn) { DEBUG(dbgs() << "Machine Function\n"); MF = &Fn; + MRI = &Fn.getRegInfo(); TM = &Fn.getTarget(); TRI = TM->getRegisterInfo(); TII = TM->getInstrInfo(); diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 81cfd8f..4fafd28 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -489,7 +489,7 @@ PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() { // did, but none of their definitions would prevent us from coalescing. // We're good to go with the coalesce. - float cBenefit = powf(10.0f, loopInfo->getLoopDepth(mbb)) / 5.0; + float cBenefit = std::pow(10.0f, (float)loopInfo->getLoopDepth(mbb)) / 5.0; coalescesFound[RegPair(srcReg, dstReg)] = cBenefit; coalescesFound[RegPair(dstReg, srcReg)] = cBenefit; diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 179984f..690e59f 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -343,12 +343,12 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // Spill the scavenged register before I. assert(ScavengingFrameIndex >= 0 && "Cannot scavenge register without an emergency spill slot!"); - TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC); + TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI); MachineBasicBlock::iterator II = prior(I); TRI->eliminateFrameIndex(II, SPAdj, NULL, this); // Restore the scavenged register before its use (or first terminator). - TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC); + TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI); II = prior(UseMI); TRI->eliminateFrameIndex(II, SPAdj, NULL, this); } diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 587f001..da20c12 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -27,7 +27,6 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf) : TM(mf.getTarget()), TII(TM.getInstrInfo()), TRI(TM.getRegisterInfo()), - TLI(TM.getTargetLowering()), MF(mf), MRI(mf.getRegInfo()), EntrySU(), ExitSU() { } diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp index 8e03420..ee08e1d 100644 --- a/lib/CodeGen/ScheduleDAGEmit.cpp +++ b/lib/CodeGen/ScheduleDAGEmit.cpp @@ -51,7 +51,8 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU, } } bool Success = TII->copyRegToReg(*BB, InsertPos, Reg, VRI->second, - SU->CopyDstRC, SU->CopySrcRC); + SU->CopyDstRC, SU->CopySrcRC, + DebugLoc()); (void)Success; assert(Success && "copyRegToReg failed!"); } else { @@ -62,7 +63,8 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU, isNew = isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); bool Success = TII->copyRegToReg(*BB, InsertPos, VRBase, I->getReg(), - SU->CopyDstRC, SU->CopySrcRC); + SU->CopyDstRC, SU->CopySrcRC, + DebugLoc()); (void)Success; assert(Success && "copyRegToReg failed!"); } diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index ca235c3..09202f8 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -210,7 +210,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!"); if (MO.isDef() && DanglingDebugValue[Reg].first!=0) { - SU->setDbgInstr(DanglingDebugValue[Reg].first); + SU->DbgInstrList.push_back(DanglingDebugValue[Reg].first); DbgValueVec[DanglingDebugValue[Reg].second] = 0; DanglingDebugValue[Reg] = std::make_pair((MachineInstr*)0, 0); } @@ -599,8 +599,8 @@ MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() { } BB->insert(InsertPos, SU->getInstr()); - if (SU->getDbgInstr()) - BB->insert(InsertPos, SU->getDbgInstr()); + for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) + BB->insert(InsertPos, SU->DbgInstrList[i]); } // Update the Begin iterator, as the first instruction in the block diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h index d70608f..ad82db2 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.h +++ b/lib/CodeGen/ScheduleDAGInstrs.h @@ -32,7 +32,7 @@ namespace llvm { /// For example, loop induction variable increments should be /// scheduled as soon as possible after the variable's last use. /// - class VISIBILITY_HIDDEN LoopDependencies { + class LLVM_LIBRARY_VISIBILITY LoopDependencies { const MachineLoopInfo &MLI; const MachineDominatorTree &MDT; @@ -94,7 +94,7 @@ namespace llvm { /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of /// MachineInstrs. - class VISIBILITY_HIDDEN ScheduleDAGInstrs : public ScheduleDAG { + class LLVM_LIBRARY_VISIBILITY ScheduleDAGInstrs : public ScheduleDAG { const MachineLoopInfo &MLI; const MachineDominatorTree &MDT; const MachineFrameInfo *MFI; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 3639f80..6bddd78 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -760,12 +760,18 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { bool Replace1 = false; SDValue N1 = Op.getOperand(1); - SDValue NN1 = PromoteOperand(N1, PVT, Replace1); - if (NN1.getNode() == 0) - return SDValue(); + SDValue NN1; + if (N0 == N1) + NN1 = NN0; + else { + NN1 = PromoteOperand(N1, PVT, Replace1); + if (NN1.getNode() == 0) + return SDValue(); + } AddToWorkList(NN0.getNode()); - AddToWorkList(NN1.getNode()); + if (NN1.getNode()) + AddToWorkList(NN1.getNode()); if (Replace0) ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); @@ -3425,8 +3431,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); if (NarrowLoad.getNode()) { - if (NarrowLoad.getNode() != N0.getNode()) + SDNode* oye = N0.getNode()->getOperand(0).getNode(); + if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); + // CombineTo deleted the truncate, if needed, but not what's under it. + AddToWorkList(oye); + } return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -3564,7 +3574,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); } } @@ -3585,9 +3595,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()), NegOne, DAG.getConstant(0, VT)); - } - - + } // fold (sext x) -> (zext x) if the sign bit is known zero. if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && @@ -3615,8 +3623,12 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::TRUNCATE) { SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); if (NarrowLoad.getNode()) { - if (NarrowLoad.getNode() != N0.getNode()) + SDNode* oye = N0.getNode()->getOperand(0).getNode(); + if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); + // CombineTo deleted the truncate, if needed, but not what's under it. + AddToWorkList(oye); + } return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad); } } @@ -3726,8 +3738,48 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } } - // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc if (N0.getOpcode() == ISD::SETCC) { + if (!LegalOperations && VT.isVector()) { + // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. + // Only do this before legalize for now. + EVT N0VT = N0.getOperand(0).getValueType(); + EVT EltVT = VT.getVectorElementType(); + SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), + DAG.getConstant(1, EltVT)); + if (VT.getSizeInBits() == N0VT.getSizeInBits()) { + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()), + DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + &OneOps[0], OneOps.size())); + } else { + // If the desired elements are smaller or larger than the source + // elements we can use a matching integer vector type and then + // truncate/sign extend + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT), + DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + &OneOps[0], OneOps.size())); + } + } + + // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc SDValue SCC = SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, VT), DAG.getConstant(0, VT), @@ -3780,8 +3832,12 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::TRUNCATE) { SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); if (NarrowLoad.getNode()) { - if (NarrowLoad.getNode() != N0.getNode()) + SDNode* oye = N0.getNode()->getOperand(0).getNode(); + if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); + // CombineTo deleted the truncate, if needed, but not what's under it. + AddToWorkList(oye); + } return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad); } } @@ -3883,8 +3939,39 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } - // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc if (N0.getOpcode() == ISD::SETCC) { + // aext(setcc) -> sext_in_reg(vsetcc) for vectors. + // Only do this before legalize for now. + if (VT.isVector() && !LegalOperations) { + EVT N0VT = N0.getOperand(0).getValueType(); + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + if (VT.getSizeInBits() == N0VT.getSizeInBits()) + return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + // If the desired elements are smaller or larger than the source + // elements we can use a matching integer vector type and then + // truncate/sign extend + else { + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + } + } + + // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc SDValue SCC = SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, VT), DAG.getConstant(0, VT), @@ -5278,10 +5365,6 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { SDValue Offset; ISD::MemIndexedMode AM = ISD::UNINDEXED; if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { - if (Ptr == Offset && Op->getOpcode() == ISD::ADD) - std::swap(BasePtr, Offset); - if (Ptr != BasePtr) - continue; // Don't create a indexed load / store with zero offset. if (isa<ConstantSDNode>(Offset) && cast<ConstantSDNode>(Offset)->isNullValue()) @@ -5953,6 +6036,10 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue InVal = N->getOperand(1); SDValue EltNo = N->getOperand(2); + // If the inserted element is an UNDEF, just use the input vector. + if (InVal.getOpcode() == ISD::UNDEF) + return InVec; + // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new // vector with the inserted element. if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) { @@ -6206,7 +6293,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // all scalar elements the same. if (cast<ShuffleVectorSDNode>(N)->isSplat()) { SDNode *V = N0.getNode(); - // If this is a bit convert that changes the element type of the vector but // not the number of vector elements, look through it. Be careful not to @@ -6338,13 +6424,21 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { break; } - Ops.push_back(DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), - EltType, LHSOp, RHSOp)); - AddToWorkList(Ops.back().getNode()); - assert((Ops.back().getOpcode() == ISD::UNDEF || - Ops.back().getOpcode() == ISD::Constant || - Ops.back().getOpcode() == ISD::ConstantFP) && - "Scalar binop didn't fold!"); + // If the vector element type is not legal, the BUILD_VECTOR operands + // are promoted and implicitly truncated. Make that explicit here. + if (LHSOp.getValueType() != EltType) + LHSOp = DAG.getNode(ISD::TRUNCATE, LHS.getDebugLoc(), EltType, LHSOp); + if (RHSOp.getValueType() != EltType) + RHSOp = DAG.getNode(ISD::TRUNCATE, RHS.getDebugLoc(), EltType, RHSOp); + + SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), EltType, + LHSOp, RHSOp); + if (FoldOp.getOpcode() != ISD::UNDEF && + FoldOp.getOpcode() != ISD::Constant && + FoldOp.getOpcode() != ISD::ConstantFP) + break; + Ops.push_back(FoldOp); + AddToWorkList(FoldOp.getNode()); } if (Ops.size() == LHS.getNumOperands()) { diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index b4c3833..95f4d07 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -56,6 +56,27 @@ #include "FunctionLoweringInfo.h" using namespace llvm; +bool FastISel::hasTrivialKill(const Value *V) const { + // Don't consider constants or arguments to have trivial kills. + const Instruction *I = dyn_cast<Instruction>(V); + if (!I) + return false; + + // No-op casts are trivially coalesced by fast-isel. + if (const CastInst *Cast = dyn_cast<CastInst>(I)) + if (Cast->isNoopCast(TD.getIntPtrType(Cast->getContext())) && + !hasTrivialKill(Cast->getOperand(0))) + return false; + + // Only instructions with a single use in the same basic block are considered + // to have trivial kills. + return I->hasOneUse() && + !(I->getOpcode() == Instruction::BitCast || + I->getOpcode() == Instruction::PtrToInt || + I->getOpcode() == Instruction::IntToPtr) && + cast<Instruction>(I->use_begin())->getParent() == I->getParent(); +} + unsigned FastISel::getRegForValue(const Value *V) { EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true); // Don't handle non-simple values in FastISel. @@ -78,12 +99,24 @@ unsigned FastISel::getRegForValue(const Value *V) { // cache values defined by Instructions across blocks, and other values // only locally. This is because Instructions already have the SSA // def-dominates-use requirement enforced. - if (ValueMap.count(V)) - return ValueMap[V]; + DenseMap<const Value *, unsigned>::iterator I = ValueMap.find(V); + if (I != ValueMap.end()) + return I->second; unsigned Reg = LocalValueMap[V]; if (Reg != 0) return Reg; + // In bottom-up mode, just create the virtual register which will be used + // to hold the value. It will be materialized later. + if (IsBottomUp) { + Reg = createResultReg(TLI.getRegClassFor(VT)); + if (isa<Instruction>(V)) + ValueMap[V] = Reg; + else + LocalValueMap[V] = Reg; + return Reg; + } + return materializeRegForValue(V, VT); } @@ -123,7 +156,8 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { unsigned IntegerReg = getRegForValue(ConstantInt::get(V->getContext(), IntVal)); if (IntegerReg != 0) - Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg); + Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, + IntegerReg, /*Kill=*/false); } } } else if (const Operator *Op = dyn_cast<Operator>(V)) { @@ -174,25 +208,33 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) { else if (Reg != AssignedReg) { const TargetRegisterClass *RegClass = MRI.getRegClass(Reg); TII.copyRegToReg(*MBB, MBB->end(), AssignedReg, - Reg, RegClass, RegClass); + Reg, RegClass, RegClass, DL); } return AssignedReg; } -unsigned FastISel::getRegForGEPIndex(const Value *Idx) { +std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) { unsigned IdxN = getRegForValue(Idx); if (IdxN == 0) // Unhandled operand. Halt "fast" selection and bail. - return 0; + return std::pair<unsigned, bool>(0, false); + + bool IdxNIsKill = hasTrivialKill(Idx); // If the index is smaller or larger than intptr_t, truncate or extend it. MVT PtrVT = TLI.getPointerTy(); EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); - if (IdxVT.bitsLT(PtrVT)) - IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN); - else if (IdxVT.bitsGT(PtrVT)) - IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN); - return IdxN; + if (IdxVT.bitsLT(PtrVT)) { + IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, + IdxN, IdxNIsKill); + IdxNIsKill = true; + } + else if (IdxVT.bitsGT(PtrVT)) { + IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, + IdxN, IdxNIsKill); + IdxNIsKill = true; + } + return std::pair<unsigned, bool>(IdxN, IdxNIsKill); } /// SelectBinaryOp - Select and emit code for a binary operator instruction, @@ -224,10 +266,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { // Unhandled operand. Halt "fast" selection and bail. return false; + bool Op0IsKill = hasTrivialKill(I->getOperand(0)); + // Check if the second operand is a constant and handle it appropriately. if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(), - ISDOpcode, Op0, CI->getZExtValue()); + ISDOpcode, Op0, Op0IsKill, + CI->getZExtValue()); if (ResultReg != 0) { // We successfully emitted code for the given LLVM Instruction. UpdateValueMap(I, ResultReg); @@ -238,7 +283,7 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { // Check if the second operand is a constant float. if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) { unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(), - ISDOpcode, Op0, CF); + ISDOpcode, Op0, Op0IsKill, CF); if (ResultReg != 0) { // We successfully emitted code for the given LLVM Instruction. UpdateValueMap(I, ResultReg); @@ -251,9 +296,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { // Unhandled operand. Halt "fast" selection and bail. return false; + bool Op1IsKill = hasTrivialKill(I->getOperand(1)); + // Now we have both operands in registers. Emit the instruction. unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(), - ISDOpcode, Op0, Op1); + ISDOpcode, + Op0, Op0IsKill, + Op1, Op1IsKill); if (ResultReg == 0) // Target-specific code wasn't able to find a machine opcode for // the given ISD opcode and type. Halt "fast" selection and bail. @@ -270,6 +319,8 @@ bool FastISel::SelectGetElementPtr(const User *I) { // Unhandled operand. Halt "fast" selection and bail. return false; + bool NIsKill = hasTrivialKill(I->getOperand(0)); + const Type *Ty = I->getOperand(0)->getType(); MVT VT = TLI.getPointerTy(); for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1, @@ -282,10 +333,11 @@ bool FastISel::SelectGetElementPtr(const User *I) { uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field); // FIXME: This can be optimized by combining the add with a // subsequent one. - N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT); + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); if (N == 0) // Unhandled operand. Halt "fast" selection and bail. return false; + NIsKill = true; } Ty = StTy->getElementType(Field); } else { @@ -296,27 +348,31 @@ bool FastISel::SelectGetElementPtr(const User *I) { if (CI->getZExtValue() == 0) continue; uint64_t Offs = TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); - N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT); + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); if (N == 0) // Unhandled operand. Halt "fast" selection and bail. return false; + NIsKill = true; continue; } // N = N + Idx * ElementSize; uint64_t ElementSize = TD.getTypeAllocSize(Ty); - unsigned IdxN = getRegForGEPIndex(Idx); + std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); + unsigned IdxN = Pair.first; + bool IdxNIsKill = Pair.second; if (IdxN == 0) // Unhandled operand. Halt "fast" selection and bail. return false; if (ElementSize != 1) { - IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, ElementSize, VT); + IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT); if (IdxN == 0) // Unhandled operand. Halt "fast" selection and bail. return false; + IdxNIsKill = true; } - N = FastEmit_rr(VT, VT, ISD::ADD, N, IdxN); + N = FastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); if (N == 0) // Unhandled operand. Halt "fast" selection and bail. return false; @@ -338,7 +394,7 @@ bool FastISel::SelectCall(const User *I) { default: break; case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast<DbgDeclareInst>(I); - if (!DIDescriptor::ValidDebugInfo(DI->getVariable(), CodeGenOpt::None) || + if (!DIVariable(DI->getVariable()).Verify() || !MF.getMMI().hasDebugInfo()) return true; @@ -402,7 +458,7 @@ bool FastISel::SelectCall(const User *I) { const TargetRegisterClass *RC = TLI.getRegClassFor(VT); unsigned ResultReg = createResultReg(RC); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - Reg, RC, RC); + Reg, RC, RC, DL); assert(InsertedCopy && "Can't copy address registers!"); InsertedCopy = InsertedCopy; UpdateValueMap(I, ResultReg); @@ -432,17 +488,19 @@ bool FastISel::SelectCall(const User *I) { const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); unsigned ResultReg = createResultReg(RC); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, Reg, - RC, RC); + RC, RC, DL); assert(InsertedCopy && "Can't copy address registers!"); InsertedCopy = InsertedCopy; + bool ResultRegIsKill = hasTrivialKill(I); + // Cast the register to the type of the selector. if (SrcVT.bitsGT(MVT::i32)) ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, - ResultReg); + ResultReg, ResultRegIsKill); else if (SrcVT.bitsLT(MVT::i32)) ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, - ISD::SIGN_EXTEND, ResultReg); + ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill); if (ResultReg == 0) // Unhandled operand. Halt "fast" selection and bail. return false; @@ -490,12 +548,15 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) { // Unhandled operand. Halt "fast" selection and bail. return false; + bool InputRegIsKill = hasTrivialKill(I->getOperand(0)); + // If the operand is i1, arrange for the high bits in the register to be zero. if (SrcVT == MVT::i1) { SrcVT = TLI.getTypeToTransformTo(I->getContext(), SrcVT); - InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg); + InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg, InputRegIsKill); if (!InputReg) return false; + InputRegIsKill = true; } // If the result is i1, truncate to the target's type for i1 first. if (DstVT == MVT::i1) @@ -504,7 +565,7 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) { unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opcode, - InputReg); + InputReg, InputRegIsKill); if (!ResultReg) return false; @@ -536,6 +597,8 @@ bool FastISel::SelectBitCast(const User *I) { if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail. return false; + + bool Op0IsKill = hasTrivialKill(I->getOperand(0)); // First, try to perform the bitcast by inserting a reg-reg copy. unsigned ResultReg = 0; @@ -545,7 +608,7 @@ bool FastISel::SelectBitCast(const User *I) { ResultReg = createResultReg(DstClass); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - Op0, DstClass, SrcClass); + Op0, DstClass, SrcClass, DL); if (!InsertedCopy) ResultReg = 0; } @@ -553,7 +616,7 @@ bool FastISel::SelectBitCast(const User *I) { // If the reg-reg copy failed, select a BIT_CONVERT opcode. if (!ResultReg) ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), - ISD::BIT_CONVERT, Op0); + ISD::BIT_CONVERT, Op0, Op0IsKill); if (!ResultReg) return false; @@ -609,10 +672,12 @@ FastISel::SelectFNeg(const User *I) { unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I)); if (OpReg == 0) return false; + bool OpRegIsKill = hasTrivialKill(I); + // If the target has ISD::FNEG, use it. EVT VT = TLI.getValueType(I->getType()); unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), - ISD::FNEG, OpReg); + ISD::FNEG, OpReg, OpRegIsKill); if (ResultReg != 0) { UpdateValueMap(I, ResultReg); return true; @@ -626,18 +691,19 @@ FastISel::SelectFNeg(const User *I) { return false; unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), - ISD::BIT_CONVERT, OpReg); + ISD::BIT_CONVERT, OpReg, OpRegIsKill); if (IntReg == 0) return false; - unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR, IntReg, + unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR, + IntReg, /*Kill=*/true, UINT64_C(1) << (VT.getSizeInBits()-1), IntVT.getSimpleVT()); if (IntResultReg == 0) return false; ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), - ISD::BIT_CONVERT, IntResultReg); + ISD::BIT_CONVERT, IntResultReg, /*Kill=*/true); if (ResultReg == 0) return false; @@ -782,7 +848,8 @@ FastISel::FastISel(MachineFunction &mf, TM(MF.getTarget()), TD(*TM.getTargetData()), TII(*TM.getInstrInfo()), - TLI(*TM.getTargetLowering()) { + TLI(*TM.getTargetLowering()), + IsBottomUp(false) { } FastISel::~FastISel() {} @@ -793,13 +860,15 @@ unsigned FastISel::FastEmit_(MVT, MVT, } unsigned FastISel::FastEmit_r(MVT, MVT, - unsigned, unsigned /*Op0*/) { + unsigned, + unsigned /*Op0*/, bool /*Op0IsKill*/) { return 0; } unsigned FastISel::FastEmit_rr(MVT, MVT, - unsigned, unsigned /*Op0*/, - unsigned /*Op0*/) { + unsigned, + unsigned /*Op0*/, bool /*Op0IsKill*/, + unsigned /*Op1*/, bool /*Op1IsKill*/) { return 0; } @@ -813,20 +882,23 @@ unsigned FastISel::FastEmit_f(MVT, MVT, } unsigned FastISel::FastEmit_ri(MVT, MVT, - unsigned, unsigned /*Op0*/, + unsigned, + unsigned /*Op0*/, bool /*Op0IsKill*/, uint64_t /*Imm*/) { return 0; } unsigned FastISel::FastEmit_rf(MVT, MVT, - unsigned, unsigned /*Op0*/, + unsigned, + unsigned /*Op0*/, bool /*Op0IsKill*/, const ConstantFP * /*FPImm*/) { return 0; } unsigned FastISel::FastEmit_rri(MVT, MVT, unsigned, - unsigned /*Op0*/, unsigned /*Op1*/, + unsigned /*Op0*/, bool /*Op0IsKill*/, + unsigned /*Op1*/, bool /*Op1IsKill*/, uint64_t /*Imm*/) { return 0; } @@ -836,16 +908,18 @@ unsigned FastISel::FastEmit_rri(MVT, MVT, /// If that fails, it materializes the immediate into a register and try /// FastEmit_rr instead. unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, - unsigned Op0, uint64_t Imm, - MVT ImmType) { + unsigned Op0, bool Op0IsKill, + uint64_t Imm, MVT ImmType) { // First check if immediate type is legal. If not, we can't use the ri form. - unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Imm); + unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); if (ResultReg != 0) return ResultReg; unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm); if (MaterialReg == 0) return 0; - return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg); + return FastEmit_rr(VT, VT, Opcode, + Op0, Op0IsKill, + MaterialReg, /*Kill=*/true); } /// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries @@ -853,10 +927,10 @@ unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, /// FastEmit_rf. If that fails, it materializes the immediate into a register /// and try FastEmit_rr instead. unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode, - unsigned Op0, const ConstantFP *FPImm, - MVT ImmType) { + unsigned Op0, bool Op0IsKill, + const ConstantFP *FPImm, MVT ImmType) { // First check if immediate type is legal. If not, we can't use the rf form. - unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, FPImm); + unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, Op0IsKill, FPImm); if (ResultReg != 0) return ResultReg; @@ -886,11 +960,13 @@ unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode, if (IntegerReg == 0) return 0; MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT, - ISD::SINT_TO_FP, IntegerReg); + ISD::SINT_TO_FP, IntegerReg, /*Kill=*/true); if (MaterialReg == 0) return 0; } - return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg); + return FastEmit_rr(VT, VT, Opcode, + Op0, Op0IsKill, + MaterialReg, /*Kill=*/true); } unsigned FastISel::createResultReg(const TargetRegisterClass* RC) { @@ -908,16 +984,16 @@ unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0) { + unsigned Op0, bool Op0IsKill) { unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addReg(Op0); + BuildMI(MBB, DL, II, ResultReg).addReg(Op0, Op0IsKill * RegState::Kill); else { - BuildMI(MBB, DL, II).addReg(Op0); + BuildMI(MBB, DL, II).addReg(Op0, Op0IsKill * RegState::Kill); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC); + II.ImplicitDefs[0], RC, RC, DL); if (!InsertedCopy) ResultReg = 0; } @@ -927,16 +1003,21 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0, unsigned Op1) { + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill) { unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1); + BuildMI(MBB, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill); else { - BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1); + BuildMI(MBB, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC); + II.ImplicitDefs[0], RC, RC, DL); if (!InsertedCopy) ResultReg = 0; } @@ -945,16 +1026,21 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0, uint64_t Imm) { + unsigned Op0, bool Op0IsKill, + uint64_t Imm) { unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Imm); + BuildMI(MBB, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm); else { - BuildMI(MBB, DL, II).addReg(Op0).addImm(Imm); + BuildMI(MBB, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC); + II.ImplicitDefs[0], RC, RC, DL); if (!InsertedCopy) ResultReg = 0; } @@ -963,16 +1049,21 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0, const ConstantFP *FPImm) { + unsigned Op0, bool Op0IsKill, + const ConstantFP *FPImm) { unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addFPImm(FPImm); + BuildMI(MBB, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addFPImm(FPImm); else { - BuildMI(MBB, DL, II).addReg(Op0).addFPImm(FPImm); + BuildMI(MBB, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addFPImm(FPImm); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC); + II.ImplicitDefs[0], RC, RC, DL); if (!InsertedCopy) ResultReg = 0; } @@ -981,16 +1072,24 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0, unsigned Op1, uint64_t Imm) { + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + uint64_t Imm) { unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1).addImm(Imm); + BuildMI(MBB, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm); else { - BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1).addImm(Imm); + BuildMI(MBB, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC); + II.ImplicitDefs[0], RC, RC, DL); if (!InsertedCopy) ResultReg = 0; } @@ -1008,7 +1107,7 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, else { BuildMI(MBB, DL, II).addImm(Imm); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC); + II.ImplicitDefs[0], RC, RC, DL); if (!InsertedCopy) ResultReg = 0; } @@ -1016,18 +1115,23 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, } unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, - unsigned Op0, uint32_t Idx) { + unsigned Op0, bool Op0IsKill, + uint32_t Idx) { const TargetRegisterClass* RC = MRI.getRegClass(Op0); unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); const TargetInstrDesc &II = TII.get(TargetOpcode::EXTRACT_SUBREG); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Idx); + BuildMI(MBB, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Idx); else { - BuildMI(MBB, DL, II).addReg(Op0).addImm(Idx); + BuildMI(MBB, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Idx); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC); + II.ImplicitDefs[0], RC, RC, DL); if (!InsertedCopy) ResultReg = 0; } @@ -1036,8 +1140,8 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, /// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op /// with all but the least significant bit set to zero. -unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op) { - return FastEmit_ri(VT, VT, ISD::AND, Op, 1); +unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) { + return FastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1); } /// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks. @@ -1070,6 +1174,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // emitted yet. for (BasicBlock::const_iterator I = SuccBB->begin(); const PHINode *PN = dyn_cast<PHINode>(I); ++I) { + // Ignore dead phi's. if (PN->use_empty()) continue; @@ -1092,12 +1197,19 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + // Set the DebugLoc for the copy. Prefer the location of the operand + // if there is one; use the location of the PHI otherwise. + DL = PN->getDebugLoc(); + if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp)) + DL = Inst->getDebugLoc(); + unsigned Reg = getRegForValue(PHIOp); if (Reg == 0) { PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); return false; } PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); + DL = DebugLoc(); } } diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index c5dae82..16eb8a7 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -143,7 +143,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, // Create the reg, emit the copy. VRBase = MRI->createVirtualRegister(DstRC); bool Emitted = TII->copyRegToReg(*MBB, InsertPos, VRBase, SrcReg, - DstRC, SrcRC); + DstRC, SrcRC, Node->getDebugLoc()); assert(Emitted && "Unable to issue a copy instruction!\n"); (void) Emitted; @@ -265,7 +265,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, const TargetInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, - bool IsDebug) { + bool IsDebug, bool IsClone, bool IsCloned) { assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Flag && "Chain and flag operands should occur at end of operand list!"); @@ -289,7 +289,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) { unsigned NewVReg = MRI->createVirtualRegister(DstRC); bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg, - DstRC, SrcRC); + DstRC, SrcRC, Op.getNode()->getDebugLoc()); assert(Emitted && "Unable to issue a copy instruction!\n"); (void) Emitted; VReg = NewVReg; @@ -297,15 +297,25 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, } // If this value has only one use, that use is a kill. This is a - // conservative approximation. Tied operands are never killed, so we need - // to check that. And that means we need to determine the index of the - // operand. - unsigned Idx = MI->getNumOperands(); - while (Idx > 0 && - MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit()) - --Idx; - bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1; - bool isKill = Op.hasOneUse() && !isTied && !IsDebug; + // conservative approximation. InstrEmitter does trivial coalescing + // with CopyFromReg nodes, so don't emit kill flags for them. + // Avoid kill flags on Schedule cloned nodes, since there will be + // multiple uses. + // Tied operands are never killed, so we need to check that. And that + // means we need to determine the index of the operand. + bool isKill = Op.hasOneUse() && + Op.getNode()->getOpcode() != ISD::CopyFromReg && + !IsDebug && + !(IsClone || IsCloned); + if (isKill) { + unsigned Idx = MI->getNumOperands(); + while (Idx > 0 && + MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit()) + --Idx; + bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1; + if (isTied) + isKill = false; + } MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef, false/*isImp*/, isKill, @@ -322,9 +332,10 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, const TargetInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, - bool IsDebug) { + bool IsDebug, bool IsClone, bool IsCloned) { if (Op.isMachineOpcode()) { - AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, IsDebug); + AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, + IsDebug, IsClone, IsCloned); } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { MI->addOperand(MachineOperand::CreateImm(C->getSExtValue())); } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) { @@ -373,7 +384,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Flag && "Chain and flag operands should occur at end of operand list!"); - AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, IsDebug); + AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, + IsDebug, IsClone, IsCloned); } } @@ -395,7 +407,8 @@ getSuperRegisterRegClass(const TargetRegisterClass *TRC, /// EmitSubregNode - Generate machine code for subreg nodes. /// void InstrEmitter::EmitSubregNode(SDNode *Node, - DenseMap<SDValue, unsigned> &VRBaseMap){ + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned) { unsigned VRBase = 0; unsigned Opc = Node->getMachineOpcode(); @@ -439,7 +452,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // Add def, source, and subreg index MI->addOperand(MachineOperand::CreateReg(VRBase, true)); - AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap); + AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); MI->addOperand(MachineOperand::CreateImm(SubIdx)); MBB->insert(InsertPos, MI); } else if (Opc == TargetOpcode::INSERT_SUBREG || @@ -473,9 +487,11 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, const ConstantSDNode *SD = cast<ConstantSDNode>(N0); MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue())); } else - AddOperand(MI, N0, 0, 0, VRBaseMap); + AddOperand(MI, N0, 0, 0, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); // Add the subregster being inserted - AddOperand(MI, N1, 0, 0, VRBaseMap); + AddOperand(MI, N1, 0, 0, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); MI->addOperand(MachineOperand::CreateImm(SubIdx)); MBB->insert(InsertPos, MI); } else @@ -503,7 +519,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, // Create the new VReg in the destination class and emit a copy. unsigned NewVReg = MRI->createVirtualRegister(DstRC); bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg, - DstRC, SrcRC); + DstRC, SrcRC, Node->getDebugLoc()); assert(Emitted && "Unable to issue a copy instruction for a COPY_TO_REGCLASS node!\n"); (void) Emitted; @@ -517,7 +533,8 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. /// void InstrEmitter::EmitRegSequence(SDNode *Node, - DenseMap<SDValue, unsigned> &VRBaseMap) { + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned) { const TargetRegisterClass *RC = TLI->getRegClassFor(Node->getValueType(0)); unsigned NewVReg = MRI->createVirtualRegister(RC); MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), @@ -528,17 +545,21 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, const TargetInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); for (unsigned i = 0; i != NumOps; ++i) { SDValue Op = Node->getOperand(i); -#ifndef NDEBUG if (i & 1) { unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); - const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); - const TargetRegisterClass *SRC = - getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0)); - assert(SRC == RC && "Invalid subregister index in REG_SEQUENCE"); + const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); + const TargetRegisterClass *SRC = + TRI->getMatchingSuperRegClass(RC, TRC, SubIdx); + if (!SRC) + llvm_unreachable("Invalid subregister index in REG_SEQUENCE"); + if (SRC != RC) { + MRI->setRegClass(NewVReg, SRC); + RC = SRC; + } } -#endif - AddOperand(MI, Op, i+1, &II, VRBaseMap); + AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); } MBB->insert(InsertPos, MI); @@ -579,11 +600,17 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, MIB.addReg(0U); // undef else AddOperand(&*MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap, - true /*IsDebug*/); + /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false); } else if (SD->getKind() == SDDbgValue::CONST) { const Value *V = SD->getConst(); if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - MIB.addImm(CI->getSExtValue()); + // FIXME: SDDbgValues aren't updated with legalization, so it's possible + // to have i128 values in them at this point. As a crude workaround, just + // drop the debug info if this happens. + if (!CI->getValue().isSignedIntN(64)) + MIB.addReg(0U); + else + MIB.addImm(CI->getSExtValue()); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { MIB.addFPImm(CF); } else { @@ -612,7 +639,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, if (Opc == TargetOpcode::EXTRACT_SUBREG || Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { - EmitSubregNode(Node, VRBaseMap); + EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned); return; } @@ -624,7 +651,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Handle REG_SEQUENCE specially. if (Opc == TargetOpcode::REG_SEQUENCE) { - EmitRegSequence(Node, VRBaseMap); + EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned); return; } @@ -663,7 +690,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0; for (unsigned i = NumSkip; i != NodeOperands; ++i) AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II, - VRBaseMap); + VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Transfer all of the memory reference descriptions of this instruction. MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(), @@ -749,7 +776,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, Node->getOperand(1).getValueType()); bool Emitted = TII->copyRegToReg(*MBB, InsertPos, DestReg, SrcReg, - DstTRC, SrcTRC); + DstTRC, SrcTRC, Node->getDebugLoc()); assert(Emitted && "Unable to issue a copy instruction!\n"); (void) Emitted; break; @@ -810,7 +837,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, // The addressing mode has been selected, just add all of the // operands to the machine instruction. for (; NumVals; --NumVals, ++i) - AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap); + AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap, + /*IsDebug=*/false, IsClone, IsCloned); break; } } diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index c7e7c71..02c044c 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -65,7 +65,7 @@ class InstrEmitter { unsigned IIOpNum, const TargetInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, - bool IsDebug = false); + bool IsDebug, bool IsClone, bool IsCloned); /// AddOperand - Add the specified operand to the specified machine instr. II /// specifies the instruction information for the node, and IIOpNum is the @@ -75,11 +75,12 @@ class InstrEmitter { unsigned IIOpNum, const TargetInstrDesc *II, DenseMap<SDValue, unsigned> &VRBaseMap, - bool IsDebug = false); + bool IsDebug, bool IsClone, bool IsCloned); /// EmitSubregNode - Generate machine code for subreg nodes. /// - void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap); + void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned); /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. /// COPY_TO_REGCLASS is just a normal copy, except that the destination @@ -90,7 +91,8 @@ class InstrEmitter { /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. /// - void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap); + void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned); public: /// CountResults - The results of target nodes have register or immediate /// operands first, then an optional chain, and optional flag operands diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index bedfa57..62a37a5 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -23,7 +23,6 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetSubtarget.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" @@ -2027,6 +2026,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, return Result; } assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); + // Code below here assumes !isSigned without checking again. // Implementation of unsigned i64 to f64 following the algorithm in // __floatundidf in compiler_rt. This implementation has the advantage @@ -2052,6 +2052,41 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub); } + // Implementation of unsigned i64 to f32. This implementation has the + // advantage of performing rounding correctly. + // TODO: Generalize this for use with other types. + if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { + EVT SHVT = TLI.getShiftAmountTy(); + + SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, + DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64)); + SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, + DAG.getConstant(UINT64_C(0x800), MVT::i64)); + SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, + DAG.getConstant(UINT64_C(0x7ff), MVT::i64)); + SDValue Ne = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), + And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE); + SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0); + SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), + Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64), + ISD::SETUGE); + SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0); + + SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2, + DAG.getConstant(32, SHVT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh); + SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc); + SDValue TwoP32 = + DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), MVT::f64); + SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt); + SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2); + SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo); + SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2); + return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd, + DAG.getIntPtrConstant(0)); + + } + SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()), @@ -2488,6 +2523,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, EVT VT = Node->getValueType(0); EVT EltVT = VT.getVectorElementType(); + if (getTypeAction(EltVT) == Promote) + EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); unsigned NumElems = VT.getVectorNumElements(); SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i != NumElems; ++i) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 548454c..8b382bc 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2314,13 +2314,29 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) { return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL); } +static const fltSemantics *EVTToAPFloatSemantics(EVT VT) { + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unknown FP format"); + case MVT::f32: return &APFloat::IEEEsingle; + case MVT::f64: return &APFloat::IEEEdouble; + case MVT::f80: return &APFloat::x87DoubleExtended; + case MVT::f128: return &APFloat::IEEEquad; + case MVT::ppcf128: return &APFloat::PPCDoubleDouble; + } +} + SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue Op = N->getOperand(0); EVT SrcVT = Op.getValueType(); EVT DstVT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); - if (TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){ + // The following optimization is valid only if every value in SrcVT (when + // treated as signed) is representable in DstVT. Check that the mantissa + // size of DstVT is >= than the number of bits in SrcVT -1. + const fltSemantics *sem = EVTToAPFloatSemantics(DstVT); + if (APFloat::semanticsPrecision(*sem) >= SrcVT.getSizeInBits()-1 && + TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){ // Do a signed conversion then adjust the result. SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op); SignedConv = TLI.LowerOperation(SignedConv, DAG); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index d60ad60..c665963 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -32,7 +32,7 @@ namespace llvm { /// involves promoting small sizes to large sizes or splitting up large values /// into small values. /// -class VISIBILITY_HIDDEN DAGTypeLegalizer { +class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { const TargetLowering &TLI; SelectionDAG &DAG; public: diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp index b92a672..56f5ded 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -30,7 +30,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/Statistic.h" #include <climits> using namespace llvm; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index da02850..820ba66 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -24,7 +24,6 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" @@ -53,6 +52,12 @@ static RegisterScheduler "order when possible", createSourceListDAGScheduler); +static RegisterScheduler + hybridListDAGScheduler("list-hybrid", + "Bottom-up rr list scheduling which avoid stalls for " + "long latency instructions", + createHybridListDAGScheduler); + namespace { //===----------------------------------------------------------------------===// /// ScheduleDAGRRList - The actual register reduction list scheduler @@ -64,6 +69,10 @@ private: /// it is top-down. bool isBottomUp; + /// NeedLatency - True if the scheduler will make use of latency information. + /// + bool NeedLatency; + /// AvailableQueue - The priority queue to use for the available SUnits. SchedulingPriorityQueue *AvailableQueue; @@ -80,9 +89,9 @@ private: public: ScheduleDAGRRList(MachineFunction &mf, - bool isbottomup, + bool isbottomup, bool needlatency, SchedulingPriorityQueue *availqueue) - : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), + : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency), AvailableQueue(availqueue), Topo(SUnits) { } @@ -161,9 +170,11 @@ private: return NewNode; } - /// ForceUnitLatencies - Return true, since register-pressure-reducing - /// scheduling doesn't need actual latency information. - bool ForceUnitLatencies() const { return true; } + /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't + /// need actual latency information but the hybrid scheduler does. + bool ForceUnitLatencies() const { + return !NeedLatency; + } }; } // end anonymous namespace @@ -213,6 +224,12 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { #endif --PredSU->NumSuccsLeft; + if (!ForceUnitLatencies()) { + // Updating predecessor's height. This is now the cycle when the + // predecessor can be scheduled without causing a pipeline stall. + PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); + } + // If all the node's successors are scheduled, this node is ready // to be scheduled. Ignore the special EntrySU node. if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { @@ -244,10 +261,15 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { - DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); - assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); +#ifndef NDEBUG + if (CurCycle < SU->getHeight()) + DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n"); +#endif + + // FIXME: Handle noop hazard. SU->setHeightToAtLeast(CurCycle); Sequence.push_back(SU); @@ -339,6 +361,7 @@ void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, SU->isAvailable = false; UnscheduleNodeBottomUp(OldSU); --CurCycle; + AvailableQueue->setCurCycle(CurCycle); } assert(!SU->isSucc(OldSU) && "Something is wrong!"); @@ -386,7 +409,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) return NULL; - DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); + DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); N = NewNodes[1]; @@ -504,7 +527,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { SU = NewSU; } - DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n"); + DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); NewSU = CreateClone(SU); // New SUnit has the exact same predecessors. @@ -786,7 +809,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { // Issue copies, these can be expensive cross register class copies. SmallVector<SUnit*, 2> Copies; InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); - DEBUG(dbgs() << "Adding an edge from SU #" << TrySU->NodeNum + DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum << " to SU #" << Copies.front()->NodeNum << "\n"); AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, /*Reg=*/0, /*isNormalMemory=*/false, @@ -795,7 +818,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { NewDef = Copies.back(); } - DEBUG(dbgs() << "Adding an edge from SU #" << NewDef->NodeNum + DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum << " to SU #" << TrySU->NodeNum << "\n"); LiveRegDefs[Reg] = NewDef; AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, @@ -821,6 +844,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { if (CurSU) ScheduleNodeBottomUp(CurSU, CurCycle); ++CurCycle; + AvailableQueue->setCurCycle(CurCycle); } // Reverse the order if it is bottom up. @@ -889,6 +913,7 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { /// schedulers. void ScheduleDAGRRList::ListScheduleTopDown() { unsigned CurCycle = 0; + AvailableQueue->setCurCycle(CurCycle); // Release any successors of the special Entry node. ReleaseSuccessors(&EntrySU); @@ -911,6 +936,7 @@ void ScheduleDAGRRList::ListScheduleTopDown() { if (CurSU) ScheduleNodeTopDown(CurSU, CurCycle); ++CurCycle; + AvailableQueue->setCurCycle(CurCycle); } #ifndef NDEBUG @@ -956,6 +982,16 @@ namespace { bool operator()(const SUnit* left, const SUnit* right) const; }; + + struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { + RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ; + hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq) + : SPQ(spq) {} + hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) + : SPQ(RHS.SPQ) {} + + bool operator()(const SUnit* left, const SUnit* right) const; + }; } // end anonymous namespace /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. @@ -990,8 +1026,9 @@ CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { namespace { template<class SF> class RegReductionPriorityQueue : public SchedulingPriorityQueue { - PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue; - unsigned currentQueueId; + std::vector<SUnit*> Queue; + SF Picker; + unsigned CurQueueId; protected: // SUnits - The SUnits for the current graph. @@ -1007,7 +1044,7 @@ namespace { public: RegReductionPriorityQueue(const TargetInstrInfo *tii, const TargetRegisterInfo *tri) - : Queue(SF(this)), currentQueueId(0), + : Picker(this), CurQueueId(0), TII(tii), TRI(tri), scheduleDAG(NULL) {} void initNodes(std::vector<SUnit> &sunits) { @@ -1067,26 +1104,26 @@ namespace { unsigned getNodeOrdering(const SUnit *SU) const { return scheduleDAG->DAG->GetOrdering(SU->getNode()); } - - unsigned size() const { return Queue.size(); } bool empty() const { return Queue.empty(); } void push(SUnit *U) { assert(!U->NodeQueueId && "Node in the queue already"); - U->NodeQueueId = ++currentQueueId; - Queue.push(U); + U->NodeQueueId = ++CurQueueId; + Queue.push_back(U); } - void push_all(const std::vector<SUnit *> &Nodes) { - for (unsigned i = 0, e = Nodes.size(); i != e; ++i) - push(Nodes[i]); - } - SUnit *pop() { if (empty()) return NULL; - SUnit *V = Queue.top(); - Queue.pop(); + std::vector<SUnit *>::iterator Best = Queue.begin(); + for (std::vector<SUnit *>::iterator I = next(Queue.begin()), + E = Queue.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + SUnit *V = *Best; + if (Best != prior(Queue.end())) + std::swap(*Best, Queue.back()); + Queue.pop_back(); V->NodeQueueId = 0; return V; } @@ -1094,7 +1131,11 @@ namespace { void remove(SUnit *SU) { assert(!Queue.empty() && "Queue is empty!"); assert(SU->NodeQueueId != 0 && "Not in queue!"); - Queue.erase_one(SU); + std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), + SU); + if (I != prior(Queue.end())) + std::swap(*I, Queue.back()); + Queue.pop_back(); SU->NodeQueueId = 0; } @@ -1117,6 +1158,9 @@ namespace { typedef RegReductionPriorityQueue<src_ls_rr_sort> SrcRegReductionPriorityQueue; + + typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> + HybridBURRPriorityQueue; } /// closestSucc - Returns the scheduled cycle of the successor which is @@ -1203,7 +1247,7 @@ bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { } // Source order, otherwise bottom up. -bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ +bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { unsigned LOrder = SPQ->getNodeOrdering(left); unsigned ROrder = SPQ->getNodeOrdering(right); @@ -1215,6 +1259,25 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ return BURRSort(left, right, SPQ); } +bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ + bool LStall = left->SchedulingPref == Sched::Latency && + SPQ->getCurCycle() < left->getHeight(); + bool RStall = right->SchedulingPref == Sched::Latency && + SPQ->getCurCycle() < right->getHeight(); + // If scheduling one of the node will cause a pipeline stall, delay it. + // If scheduling either one of the node will cause a pipeline stall, sort them + // according to their height. + // If neither will cause a pipeline stall, try to reduce register pressure. + if (LStall) { + if (!RStall) + return true; + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); + } else if (RStall) + return false; + return BURRSort(left, right, SPQ); +} + template<class SF> bool RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { @@ -1379,8 +1442,8 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { // Ok, the transformation is safe and the heuristics suggest it is // profitable. Update the graph. - DEBUG(dbgs() << "Prescheduling SU # " << SU->NodeNum - << " next to PredSU # " << PredSU->NodeNum + DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum + << " next to PredSU #" << PredSU->NodeNum << " to guide scheduling in the presence of multiple uses\n"); for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { SDep Edge = PredSU->Succs[i]; @@ -1469,7 +1532,7 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || (!SU->isCommutable && SuccSU->isCommutable)) && !scheduleDAG->IsReachable(SuccSU, SU)) { - DEBUG(dbgs() << "Adding a pseudo-two-addr edge from SU # " + DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, /*Reg=*/0, /*isNormalMemory=*/false, @@ -1563,8 +1626,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI); - ScheduleDAGRRList *SD = - new ScheduleDAGRRList(*IS->MF, true, PQ); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); PQ->setScheduleDAG(SD); return SD; } @@ -1577,8 +1639,7 @@ llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI); - ScheduleDAGRRList *SD = - new ScheduleDAGRRList(*IS->MF, false, PQ); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ); PQ->setScheduleDAG(SD); return SD; } @@ -1591,8 +1652,20 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(TII, TRI); - ScheduleDAGRRList *SD = - new ScheduleDAGRRList(*IS->MF, true, PQ); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); + PQ->setScheduleDAG(SD); + return SD; +} + +llvm::ScheduleDAGSDNodes * +llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + const TargetMachine &TM = IS->TM; + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + + HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(TII, TRI); + + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); PQ->setScheduleDAG(SD); return SD; } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 76e4771..3185c88 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtarget.h" #include "llvm/ADT/DenseMap.h" @@ -44,6 +45,24 @@ void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb, ScheduleDAG::Run(bb, insertPos); } +/// NewSUnit - Creates a new SUnit and return a ptr to it. +/// +SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) { +#ifndef NDEBUG + const SUnit *Addr = 0; + if (!SUnits.empty()) + Addr = &SUnits[0]; +#endif + SUnits.push_back(SUnit(N, (unsigned)SUnits.size())); + assert((Addr == 0 || Addr == &SUnits[0]) && + "SUnits std::vector reallocated on the fly!"); + SUnits.back().OrigNode = &SUnits.back(); + SUnit *SU = &SUnits.back(); + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + SU->SchedulingPref = TLI.getSchedulingPreference(N); + return SU; +} + SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { SUnit *SU = NewSUnit(Old->getNode()); SU->OrigNode = Old->OrigNode; @@ -52,6 +71,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { SU->isCommutable = Old->isCommutable; SU->hasPhysRegDefs = Old->hasPhysRegDefs; SU->hasPhysRegClobbers = Old->hasPhysRegClobbers; + SU->SchedulingPref = Old->SchedulingPref; Old->isCloned = true; return SU; } @@ -217,9 +237,6 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // This is a temporary workaround. SUnits.reserve(NumNodes * 2); - // Check to see if the scheduler cares about latencies. - bool UnitLatencies = ForceUnitLatencies(); - // Add all nodes in depth first order. SmallVector<SDNode*, 64> Worklist; SmallPtrSet<SDNode*, 64> Visited; @@ -282,10 +299,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { N->setNodeId(NodeSUnit->NodeNum); // Assign the Latency field of NodeSUnit using target-provided information. - if (UnitLatencies) - NodeSUnit->Latency = 1; - else - ComputeLatency(NodeSUnit); + ComputeLatency(NodeSUnit); } } @@ -353,7 +367,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, OpSU->Latency, PhysReg); if (!isChain && !UnitLatencies) { - ComputeOperandLatency(OpSU, SU, const_cast<SDep &>(dep)); + ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep)); ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); } @@ -377,7 +391,17 @@ void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { } void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { + // Check to see if the scheduler cares about latencies. + if (ForceUnitLatencies()) { + SU->Latency = 1; + return; + } + const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + if (InstrItins.isEmpty()) { + SU->Latency = 1; + return; + } // Compute the latency for the node. We use the sum of the latencies for // all nodes flagged together into this SUnit. @@ -389,6 +413,37 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { } } +void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, + unsigned OpIdx, SDep& dep) const{ + // Check to see if the scheduler cares about latencies. + if (ForceUnitLatencies()) + return; + + const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + if (InstrItins.isEmpty()) + return; + + if (dep.getKind() != SDep::Data) + return; + + unsigned DefIdx = Use->getOperand(OpIdx).getResNo(); + if (Def->isMachineOpcode() && Use->isMachineOpcode()) { + const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); + if (DefIdx >= II.getNumDefs()) + return; + int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx); + if (DefCycle < 0) + return; + const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass(); + int UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx); + if (UseCycle >= 0) { + int Latency = DefCycle - UseCycle + 1; + if (Latency >= 0) + dep.setLatency(Latency); + } + } +} + void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { if (!SU->getNode()) { dbgs() << "PHYS REG COPY\n"; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 7ae8ec2..e8714ba 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -66,18 +66,7 @@ namespace llvm { /// NewSUnit - Creates a new SUnit and return a ptr to it. /// - SUnit *NewSUnit(SDNode *N) { -#ifndef NDEBUG - const SUnit *Addr = 0; - if (!SUnits.empty()) - Addr = &SUnits[0]; -#endif - SUnits.push_back(SUnit(N, (unsigned)SUnits.size())); - assert((Addr == 0 || Addr == &SUnits[0]) && - "SUnits std::vector reallocated on the fly!"); - SUnits.back().OrigNode = &SUnits.back(); - return &SUnits.back(); - } + SUnit *NewSUnit(SDNode *N); /// Clone - Creates a clone of the specified SUnit. It does not copy the /// predecessors / successors info nor the temporary scheduling states. @@ -94,6 +83,15 @@ namespace llvm { /// virtual void ComputeLatency(SUnit *SU); + /// ComputeOperandLatency - Override dependence edge latency using + /// operand use/def information + /// + virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use, + SDep& dep) const { } + + virtual void ComputeOperandLatency(SDNode *Def, SDNode *Use, + unsigned OpIdx, SDep& dep) const; + virtual MachineBasicBlock *EmitSchedule(); /// Schedule - Order nodes according to selected style, filling diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index e6df742..38bf68b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -15,6 +15,7 @@ #include "SDNodeOrdering.h" #include "SDNodeDbgValue.h" #include "llvm/Constants.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Function.h" #include "llvm/GlobalAlias.h" @@ -32,6 +33,7 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" @@ -789,7 +791,8 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, FunctionLoweringInfo &fli) - : TM(tm), TLI(*tm.getTargetLowering()), FLI(fli), + : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()), + FLI(fli), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)), Root(getEntryNode()), Ordering(0) { AllNodes.push_back(&EntryNode); @@ -963,8 +966,18 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { EVT EltVT = VT.getScalarType(); if (EltVT==MVT::f32) return getConstantFP(APFloat((float)Val), VT, isTarget); - else + else if (EltVT==MVT::f64) return getConstantFP(APFloat(Val), VT, isTarget); + else if (EltVT==MVT::f80 || EltVT==MVT::f128) { + bool ignored; + APFloat apf = APFloat(Val); + apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, + &ignored); + return getConstantFP(apf, VT, isTarget); + } else { + assert(0 && "Unsupported type in getConstantFP"); + return SDValue(); + } } SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, @@ -2614,7 +2627,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, } break; case ISD::AND: - assert(VT.isInteger() && N1.getValueType() == N2.getValueType() && + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's // worth handling here. @@ -2627,7 +2641,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, case ISD::XOR: case ISD::ADD: case ISD::SUB: - assert(VT.isInteger() && N1.getValueType() == N2.getValueType() && + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so // it's worth handling here. @@ -2642,7 +2657,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, case ISD::SDIV: case ISD::SREM: assert(VT.isInteger() && "This operator does not apply to FP types!"); - // fall through + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + break; case ISD::FADD: case ISD::FSUB: case ISD::FMUL: @@ -2665,6 +2682,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, return N1; } } + assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); break; @@ -3525,7 +3543,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, // Then check to see if we should lower the memcpy with target-specific // code. If the target chooses to do this, this is the next best. SDValue Result = - TLI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, + TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline, DstSV, DstSVOff, SrcSV, SrcSVOff); if (Result.getNode()) @@ -3590,7 +3608,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, // Then check to see if we should lower the memmove with target-specific // code. If the target chooses to do this, this is the next best. SDValue Result = - TLI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol, + TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol, DstSV, DstSVOff, SrcSV, SrcSVOff); if (Result.getNode()) return Result; @@ -3641,7 +3659,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, // Then check to see if we should lower the memset with target-specific // code. If the target chooses to do this, this is the next best. SDValue Result = - TLI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol, + TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol, DstSV, DstSVOff); if (Result.getNode()) return Result; @@ -5417,6 +5435,8 @@ const EVT *SDNode::getValueTypeList(EVT VT) { sys::SmartScopedLock<true> Lock(*VTMutex); return &(*EVTs->insert(VT).first); } else { + assert(VT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE && + "Value type out of range!"); return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy]; } } @@ -5607,6 +5627,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::LSDAADDR: return "LSDAADDR"; case ISD::EHSELECTION: return "EHSELECTION"; case ISD::EH_RETURN: return "EH_RETURN"; + case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; + case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; case ISD::ConstantPool: return "ConstantPool"; case ISD::ExternalSymbol: return "ExternalSymbol"; case ISD::BlockAddress: return "BlockAddress"; @@ -6008,6 +6030,21 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getNodeId() != -1) OS << " [ID=" << getNodeId() << ']'; + + DebugLoc dl = getDebugLoc(); + if (G && !dl.isUnknown()) { + DIScope + Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext())); + OS << " dbg:"; + // Omit the directory, since it's usually long and uninteresting. + if (Scope.Verify()) + OS << Scope.getFilename(); + else + OS << "<unknown>"; + OS << ':' << dl.getLine(); + if (dl.getCol() != 0) + OS << ':' << dl.getCol(); + } } void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index a38b204..fbe601f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3726,6 +3726,12 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI, return true; } +// VisualStudio defines setjmp as _setjmp +#if defined(_MSC_VER) && defined(setjmp) +#define setjmp_undefined_for_visual_studio +#undef setjmp +#endif + /// visitIntrinsicCall - Lower the call to the specified intrinsic function. If /// we want to emit this as a call to a named external function, return the name /// otherwise lower it and return null. @@ -3818,7 +3824,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); - if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None)) + if (!DIVariable(DI.getVariable()).Verify()) return 0; MDNode *Variable = DI.getVariable(); @@ -3881,7 +3887,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::dbg_value: { const DbgValueInst &DI = cast<DbgValueInst>(I); - if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None)) + if (!DIVariable(DI.getVariable()).Verify()) return 0; MDNode *Variable = DI.getVariable(); @@ -3900,6 +3906,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, 0, false); } else { + bool createUndef = false; + // FIXME : Why not use getValue() directly ? SDValue &N = NodeMap[V]; if (N.getNode()) { if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) { @@ -3907,7 +3915,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { N.getResNo(), Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, N.getNode(), false); } - } else { + } else if (isa<PHINode>(V) && !V->use_empty()) { + SDValue N = getValue(V); + if (N.getNode()) { + if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) { + SDV = DAG.getDbgValue(Variable, N.getNode(), + N.getResNo(), Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, N.getNode(), false); + } + } else + createUndef = true; + } else + createUndef = true; + if (createUndef) { // We may expand this to cover more cases. One case where we have no // data available is an unreferenced parameter; we need this fallback. SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()), @@ -4018,6 +4038,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MMI.setCurrentCallSite(CI->getZExtValue()); return 0; } + case Intrinsic::eh_sjlj_setjmp: { + setValue(&I, DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, MVT::i32, getRoot(), + getValue(I.getOperand(1)))); + return 0; + } + case Intrinsic::eh_sjlj_longjmp: { + DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other, + getRoot(), + getValue(I.getOperand(1)))); + return 0; + } case Intrinsic::convertff: case Intrinsic::convertfsi: @@ -4924,7 +4955,7 @@ isAllocatableRegister(unsigned Reg, MachineFunction &MF, namespace llvm { /// AsmOperandInfo - This contains information for each constraint that we are /// lowering. -class VISIBILITY_HIDDEN SDISelAsmOperandInfo : +class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { public: /// CallOperand - If this is the result output operand or a clobber diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 422cb7a..65b8d4f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -25,9 +25,11 @@ #include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" +#include "llvm/Module.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -131,11 +133,13 @@ namespace llvm { if (OptLevel == CodeGenOpt::None) return createFastDAGScheduler(IS, OptLevel); - if (TLI.getSchedulingPreference() == TargetLowering::SchedulingForLatency) + if (TLI.getSchedulingPreference() == Sched::Latency) return createTDListDAGScheduler(IS, OptLevel); - assert(TLI.getSchedulingPreference() == - TargetLowering::SchedulingForRegPressure && "Unknown sched type!"); - return createBURRListDAGScheduler(IS, OptLevel); + if (TLI.getSchedulingPreference() == Sched::RegPressure) + return createBURRListDAGScheduler(IS, OptLevel); + assert(TLI.getSchedulingPreference() == Sched::Hybrid && + "Unknown sched type!"); + return createHybridListDAGScheduler(IS, OptLevel); } } @@ -188,6 +192,39 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } +/// FunctionCallsSetJmp - Return true if the function has a call to setjmp or +/// other function that gcc recognizes as "returning twice". This is used to +/// limit code-gen optimizations on the machine function. +/// +/// FIXME: Remove after <rdar://problem/8031714> is fixed. +static bool FunctionCallsSetJmp(const Function *F) { + const Module *M = F->getParent(); + static const char *ReturnsTwiceFns[] = { + "setjmp", + "sigsetjmp", + "setjmp_syscall", + "savectx", + "qsetjmp", + "vfork", + "getcontext" + }; +#define NUM_RETURNS_TWICE_FNS sizeof(ReturnsTwiceFns) / sizeof(const char *) + + for (unsigned I = 0; I < NUM_RETURNS_TWICE_FNS; ++I) + if (const Function *Callee = M->getFunction(ReturnsTwiceFns[I])) { + if (!Callee->use_empty()) + for (Value::const_use_iterator + I = Callee->use_begin(), E = Callee->use_end(); + I != E; ++I) + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (CI->getParent()->getParent() == F) + return true; + } + + return false; +#undef NUM_RETURNS_TWICE_FNS +} + bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Do some sanity-checking on the command-line options. assert((!EnableFastISelVerbose || EnableFastISel) && @@ -218,6 +255,13 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MachineBasicBlock *EntryMBB = MF->begin(); RegInfo->EmitLiveInCopies(EntryMBB, TRI, TII); + DenseMap<unsigned, unsigned> LiveInMap; + if (!FuncInfo->ArgDbgValues.empty()) + for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(), + E = RegInfo->livein_end(); LI != E; ++LI) + if (LI->second) + LiveInMap.insert(std::make_pair(LI->first, LI->second)); + // Insert DBG_VALUE instructions for function arguments to the entry block. for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1]; @@ -230,8 +274,44 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // FIXME: VR def may not be in entry block. Def->getParent()->insert(llvm::next(InsertPos), MI); } + + // If Reg is live-in then update debug info to track its copy in a vreg. + DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg); + if (LDI != LiveInMap.end()) { + MachineInstr *Def = RegInfo->getVRegDef(LDI->second); + MachineBasicBlock::iterator InsertPos = Def; + const MDNode *Variable = + MI->getOperand(MI->getNumOperands()-1).getMetadata(); + unsigned Offset = MI->getOperand(1).getImm(); + // Def is never a terminator here, so it is ok to increment InsertPos. + BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(), + TII.get(TargetOpcode::DBG_VALUE)) + .addReg(LDI->second, RegState::Debug) + .addImm(Offset).addMetadata(Variable); + } + } + + // Determine if there are any calls in this machine function. + MachineFrameInfo *MFI = MF->getFrameInfo(); + if (!MFI->hasCalls()) { + for (MachineFunction::const_iterator + I = MF->begin(), E = MF->end(); I != E; ++I) { + const MachineBasicBlock *MBB = I; + for (MachineBasicBlock::const_iterator + II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { + const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode()); + if (II->isInlineAsm() || (TID.isCall() && !TID.isReturn())) { + MFI->setHasCalls(true); + goto done; + } + } + } + done:; } + // Determine if there is a call to setjmp in the machine function. + MF->setCallsSetJmp(FunctionCallsSetJmp(&Fn)); + // Release function-specific state. SDB and CurDAG are already cleared // at this point. FuncInfo->clear(); @@ -662,6 +742,7 @@ void SelectionDAGISel::DoInstructionSelection() { CurDAG->setRoot(Dummy.getValue()); } + DEBUG(errs() << "===== Instruction selection ends:\n"); PostprocessISelDAG(); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 8a4a1b1..44a80d3 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -18,7 +18,6 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtarget.h" #include "llvm/GlobalVariable.h" #include "llvm/DerivedTypes.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -544,7 +543,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, ExceptionPointerRegister = 0; ExceptionSelectorRegister = 0; BooleanContents = UndefinedBooleanContent; - SchedPreferenceInfo = SchedulingForLatency; + SchedPreferenceInfo = Sched::Latency; JumpBufSize = 0; JumpBufAlignment = 0; IfCvtBlockSizeLimit = 2; @@ -2417,7 +2416,7 @@ std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { if (Constraint[0] != '{') - return std::pair<unsigned, const TargetRegisterClass*>(0, 0); + return std::make_pair(0u, static_cast<TargetRegisterClass*>(0)); assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); // Remove the braces from around the name. @@ -2449,7 +2448,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, } } - return std::pair<unsigned, const TargetRegisterClass*>(0, 0); + return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0)); } //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp index d20477f..a081e3c 100644 --- a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp +++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp @@ -12,9 +12,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetSelectionDAGInfo.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; -TargetSelectionDAGInfo::TargetSelectionDAGInfo() { +TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM) + : TD(TM.getTargetData()) { } TargetSelectionDAGInfo::~TargetSelectionDAGInfo() { diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 1f68a6f..ed3c243 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -460,7 +460,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) continue; - if (DstReg == IntB.reg) { + if (DstReg == IntB.reg && DstSubIdx == 0) { // This copy will become a noop. If it's defining a new val#, // remove that val# as well. However this live range is being // extended to the end of the existing live range defined by the copy. @@ -624,9 +624,10 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(SlotIndex CopyIdx, LR->valno->addKill(LastUseIdx.getDefIndex()); unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (tii_->isMoveInstr(*LastUseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && - DstReg == li.reg) { + DstReg == li.reg && DstSubIdx == 0) { // Last use is itself an identity code. - int DeadIdx = LastUseMI->findRegisterDefOperandIdx(li.reg, false, tri_); + int DeadIdx = LastUseMI->findRegisterDefOperandIdx(li.reg, + false, false, tri_); LastUseMI->getOperand(DeadIdx).setIsDead(); } return true; @@ -810,6 +811,8 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx; if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx) && + CopySrcSubIdx == 0 && + CopyDstSubIdx == 0 && CopySrcReg != CopyDstReg && CopySrcReg == SrcReg && CopyDstReg != UseDstReg) { // If the use is a copy and it won't be coalesced away, and its source @@ -835,8 +838,13 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, UseMI->isRegTiedToDefOperand(&O-&UseMI->getOperand(0)))) UseMI->addRegisterKilled(DstReg, tri_, true); } - DEBUG(dbgs() << "\t\tupdated: " << li_->getInstructionIndex(UseMI) - << "\t" << *UseMI); + + DEBUG({ + dbgs() << "\t\tupdated: "; + if (!UseMI->isDebugValue()) + dbgs() << li_->getInstructionIndex(UseMI) << "\t"; + dbgs() << *UseMI; + }); continue; } @@ -845,14 +853,21 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, // EAX: 1 -> AL, 2 -> AX // So RAX's sub-register 2 is AX, RAX's sub-regsiter 3 is EAX, whose // sub-register 2 is also AX. + // + // FIXME: Properly compose subreg indices for all targets. + // if (SubIdx && OldSubIdx && SubIdx != OldSubIdx) - assert(OldSubIdx < SubIdx && "Conflicting sub-register index!"); + ; else if (SubIdx) O.setSubReg(SubIdx); O.setReg(DstReg); - DEBUG(dbgs() << "\t\tupdated: " << li_->getInstructionIndex(UseMI) - << "\t" << *UseMI); + DEBUG({ + dbgs() << "\t\tupdated: "; + if (!UseMI->isDebugValue()) + dbgs() << li_->getInstructionIndex(UseMI) << "\t"; + dbgs() << *UseMI; + }); // After updating the operand, check if the machine instruction has // become a copy. If so, update its val# information. @@ -938,7 +953,7 @@ static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI, MachineInstr *DefMI = li_->getInstructionFromIndex(LRStart.getDefIndex()); if (DefMI && DefMI != CopyMI) { - int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg, false); + int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg); if (DeadIdx != -1) DefMI->getOperand(DeadIdx).setIsDead(); else @@ -1255,7 +1270,12 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg, unsigned &RealDstReg) { const TargetRegisterClass *RC = mri_->getRegClass(SrcReg); RealDstReg = tri_->getMatchingSuperReg(DstReg, SubIdx, RC); - assert(RealDstReg && "Invalid extract_subreg instruction!"); + if (!RealDstReg) { + DEBUG(dbgs() << "\tIncompatible source regclass: " + << "none of the super-registers of " << tri_->getName(DstReg) + << " are in " << RC->getName() << ".\n"); + return false; + } LiveInterval &RHS = li_->getInterval(SrcReg); // For this type of EXTRACT_SUBREG, conservatively @@ -1293,7 +1313,12 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg, unsigned &RealSrcReg) { const TargetRegisterClass *RC = mri_->getRegClass(DstReg); RealSrcReg = tri_->getMatchingSuperReg(SrcReg, SubIdx, RC); - assert(RealSrcReg && "Invalid extract_subreg instruction!"); + if (!RealSrcReg) { + DEBUG(dbgs() << "\tIncompatible destination regclass: " + << "none of the super-registers of " << tri_->getName(SrcReg) + << " are in " << RC->getName() << ".\n"); + return false; + } LiveInterval &LHS = li_->getInterval(DstReg); if (li_->hasInterval(RealSrcReg) && @@ -1419,7 +1444,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { assert(DstSubRC && "Illegal subregister index"); if (!DstSubRC->contains(SrcSubReg)) { DEBUG(dbgs() << "\tIncompatible destination regclass: " - << tri_->getName(SrcSubReg) << " not in " + << "none of the super-registers of " + << tri_->getName(SrcSubReg) << " are in " << DstSubRC->getName() << ".\n"); return false; // Not coalescable. } @@ -1436,7 +1462,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { assert(SrcSubRC && "Illegal subregister index"); if (!SrcSubRC->contains(DstSubReg)) { DEBUG(dbgs() << "\tIncompatible source regclass: " - << tri_->getName(DstSubReg) << " not in " + << "none of the super-registers of " + << tri_->getName(DstSubReg) << " are in " << SrcSubRC->getName() << ".\n"); (void)DstSubReg; return false; // Not coalescable. @@ -2625,7 +2652,7 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start, MachineInstr *UseMI = Use.getParent(); unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && - SrcReg == DstReg) + SrcReg == DstReg && SrcSubIdx == DstSubIdx) // Ignore identity copies. continue; SlotIndex Idx = li_->getInstructionIndex(UseMI); @@ -2654,7 +2681,7 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start, // Ignore identity copies. unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (!(tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && - SrcReg == DstReg)) + SrcReg == DstReg && SrcSubIdx == DstSubIdx)) for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) { MachineOperand &Use = MI->getOperand(i); if (Use.isReg() && Use.isUse() && Use.getReg() && @@ -2785,7 +2812,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { // If the move will be an identity move delete it bool isMove= tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); - if (isMove && SrcReg == DstReg) { + if (isMove && SrcReg == DstReg && SrcSubIdx == DstSubIdx) { if (li_->hasInterval(SrcReg)) { LiveInterval &RegInt = li_->getInterval(SrcReg); // If def of this move instruction is dead, remove its live range diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index 63c5554..a7b2efe 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -51,6 +51,7 @@ protected: MachineFrameInfo *mfi; MachineRegisterInfo *mri; const TargetInstrInfo *tii; + const TargetRegisterInfo *tri; VirtRegMap *vrm; /// Construct a spiller base. @@ -60,6 +61,7 @@ protected: mfi = mf->getFrameInfo(); mri = &mf->getRegInfo(); tii = mf->getTarget().getInstrInfo(); + tri = mf->getTarget().getRegisterInfo(); } /// Add spill ranges for every use/def of the live interval, inserting loads @@ -129,7 +131,8 @@ protected: // Insert reload if necessary. MachineBasicBlock::iterator miItr(mi); if (hasUse) { - tii->loadRegFromStackSlot(*mi->getParent(), miItr, newVReg, ss, trc); + tii->loadRegFromStackSlot(*mi->getParent(), miItr, newVReg, ss, trc, + tri); MachineInstr *loadInstr(prior(miItr)); SlotIndex loadIndex = lis->InsertMachineInstrInMaps(loadInstr).getDefIndex(); @@ -142,8 +145,8 @@ protected: // Insert store if necessary. if (hasDef) { - tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), newVReg, true, - ss, trc); + tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), newVReg, + true, ss, trc, tri); MachineInstr *storeInstr(llvm::next(miItr)); SlotIndex storeIndex = lis->InsertMachineInstrInMaps(storeInstr).getDefIndex(); @@ -333,7 +336,8 @@ private: // Insert a copy at the start of the MBB. The range proceeding the // copy will be attached to the original LiveInterval. MachineBasicBlock *defMBB = lis->getMBBFromIndex(newVNI->def); - tii->copyRegToReg(*defMBB, defMBB->begin(), newVReg, li->reg, trc, trc); + tii->copyRegToReg(*defMBB, defMBB->begin(), newVReg, li->reg, trc, trc, + DebugLoc()); MachineInstr *copyMI = defMBB->begin(); copyMI->addRegisterKilled(li->reg, tri); SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); @@ -386,7 +390,8 @@ private: if (isTwoAddr && !twoAddrUseIsUndef) { MachineBasicBlock *defMBB = defInst->getParent(); - tii->copyRegToReg(*defMBB, defInst, newVReg, li->reg, trc, trc); + tii->copyRegToReg(*defMBB, defInst, newVReg, li->reg, trc, trc, + DebugLoc()); MachineInstr *copyMI = prior(MachineBasicBlock::iterator(defInst)); SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); copyMI->addRegisterKilled(li->reg, tri); @@ -446,8 +451,9 @@ private: // reg. MachineBasicBlock *useMBB = useInst->getParent(); MachineBasicBlock::iterator useItr(useInst); - tii->copyRegToReg(*useMBB, next(useItr), li->reg, newVReg, trc, trc); - MachineInstr *copyMI = next(useItr); + tii->copyRegToReg(*useMBB, llvm::next(useItr), li->reg, newVReg, trc, trc, + DebugLoc()); + MachineInstr *copyMI = llvm::next(useItr); copyMI->addRegisterKilled(newVReg, tri); SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); @@ -483,7 +489,8 @@ private: assert(oldKillRange != 0 && "No kill range?"); tii->copyRegToReg(*killMBB, killMBB->getFirstTerminator(), - li->reg, newVReg, trc, trc); + li->reg, newVReg, trc, trc, + DebugLoc()); MachineInstr *copyMI = prior(killMBB->getFirstTerminator()); copyMI->addRegisterKilled(newVReg, tri); SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 42dfd7f..7f3b452 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -13,6 +13,8 @@ #define DEBUG_TYPE "stackcoloring" #include "VirtRegMap.h" +#include "llvm/Function.h" +#include "llvm/Module.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" @@ -116,6 +118,7 @@ namespace { private: void InitializeSlots(); + bool CheckForSetJmpCall(const MachineFunction &MF) const; void ScanForSpillSlotRefs(MachineFunction &MF); bool OverlapWithAssignments(LiveInterval *li, int Color) const; int ColorSlot(LiveInterval *li); @@ -607,7 +610,8 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, DEBUG(MI->dump()); ++NumLoadElim; } else { - TII->copyRegToReg(*MBB, MI, DstReg, Reg, RC, RC); + TII->copyRegToReg(*MBB, MI, DstReg, Reg, RC, RC, + MI->getDebugLoc()); ++NumRegRepl; } @@ -623,7 +627,8 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, DEBUG(MI->dump()); ++NumStoreElim; } else { - TII->copyRegToReg(*MBB, MI, Reg, SrcReg, RC, RC); + TII->copyRegToReg(*MBB, MI, Reg, SrcReg, RC, RC, + MI->getDebugLoc()); ++NumRegRepl; } @@ -697,7 +702,11 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "********** Stack Slot Coloring **********\n"); + DEBUG({ + dbgs() << "********** Stack Slot Coloring **********\n" + << "********** Function: " + << MF.getFunction()->getName() << '\n'; + }); MFI = MF.getFrameInfo(); MRI = &MF.getRegInfo(); @@ -716,6 +725,13 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { return false; } + // If there are calls to setjmp or sigsetjmp, don't perform stack slot + // coloring. The stack could be modified before the longjmp is executed, + // resulting in the wrong value being used afterwards. (See + // <rdar://problem/8007500>.) + if (MF.callsSetJmp()) + return false; + // Gather spill slot references ScanForSpillSlotRefs(MF); InitializeSlots(); diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index f8f6a55..142398c 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -696,7 +696,7 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, // the Phi defining curr.second MachineBasicBlock::iterator PI = MRI.getVRegDef(curr.second); TII->copyRegToReg(*PI->getParent(), PI, t, - curr.second, RC, RC); + curr.second, RC, RC, DebugLoc()); DEBUG(dbgs() << "Inserted copy from " << curr.second << " to " << t << "\n"); @@ -713,7 +713,7 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, // Insert copy from map[curr.first] to curr.second TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), curr.second, - map[curr.first], RC, RC); + map[curr.first], RC, RC, DebugLoc()); map[curr.first] = curr.second; DEBUG(dbgs() << "Inserted copy from " << curr.first << " to " << curr.second << "\n"); @@ -762,7 +762,7 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, // Insert a copy from dest to a new temporary t at the end of b unsigned t = MF->getRegInfo().createVirtualRegister(RC); TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), t, - curr.second, RC, RC); + curr.second, RC, RC, DebugLoc()); map[curr.second] = t; MachineBasicBlock::iterator TI = MBB->getFirstTerminator(); @@ -961,7 +961,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(I->first); TII->copyRegToReg(*SI->second, SI->second->getFirstTerminator(), - I->first, SI->first, RC, RC); + I->first, SI->first, RC, RC, DebugLoc()); LI.renumber(); diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index aa6e2b4..f2e2a76 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -561,7 +561,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first); TII->copyRegToReg(*PredBB, Loc, CopyInfos[i].first, - CopyInfos[i].second, RC,RC); + CopyInfos[i].second, RC,RC, DebugLoc()); MachineInstr *CopyMI = prior(Loc); Copies.push_back(CopyMI); } @@ -620,7 +620,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first); TII->copyRegToReg(*PrevBB, Loc, CopyInfos[i].first, - CopyInfos[i].second, RC, RC); + CopyInfos[i].second, RC, RC, DebugLoc()); MachineInstr *CopyMI = prior(Loc); Copies.push_back(CopyMI); } diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 9f95993..71ad3fb 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -22,6 +22,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" @@ -460,6 +461,26 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, = getContext().getMachOSection("__DATA", "__data", 0, SectionKind::getDataRel()); + TLSDataSection // .tdata + = getContext().getMachOSection("__DATA", "__thread_data", + MCSectionMachO::S_THREAD_LOCAL_REGULAR, + SectionKind::getDataRel()); + TLSBSSSection // .tbss + = getContext().getMachOSection("__DATA", "__thread_bss", + MCSectionMachO::S_THREAD_LOCAL_ZEROFILL, + SectionKind::getThreadBSS()); + + // TODO: Verify datarel below. + TLSTLVSection // .tlv + = getContext().getMachOSection("__DATA", "__thread_vars", + MCSectionMachO::S_THREAD_LOCAL_VARIABLES, + SectionKind::getDataRel()); + + TLSThreadInitSection + = getContext().getMachOSection("__DATA", "__thread_init", + MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS, + SectionKind::getDataRel()); + CStringSection // .cstring = getContext().getMachOSection("__TEXT", "__cstring", MCSectionMachO::S_CSTRING_LITERALS, @@ -606,6 +627,8 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, getContext().getMachOSection("__DWARF", "__debug_inlined", MCSectionMachO::S_ATTR_DEBUG, SectionKind::getMetadata()); + + TLSExtraDataSection = TLSTLVSection; } const MCSection *TargetLoweringObjectFileMachO:: @@ -646,7 +669,10 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, const MCSection *TargetLoweringObjectFileMachO:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { - assert(!Kind.isThreadLocal() && "Darwin doesn't support TLS"); + + // Handle thread local data. + if (Kind.isThreadBSS()) return TLSBSSSection; + if (Kind.isThreadData()) return TLSDataSection; if (Kind.isText()) return GV->isWeakForLinker() ? TextCoalSection : TextSection; @@ -794,94 +820,160 @@ unsigned TargetLoweringObjectFileMachO::getTTypeEncoding() const { // COFF //===----------------------------------------------------------------------===// -typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy; - -TargetLoweringObjectFileCOFF::~TargetLoweringObjectFileCOFF() { - delete (COFFUniqueMapTy*)UniquingMap; -} - - -const MCSection *TargetLoweringObjectFileCOFF:: -getCOFFSection(StringRef Name, bool isDirective, SectionKind Kind) const { - // Create the map if it doesn't already exist. - if (UniquingMap == 0) - UniquingMap = new COFFUniqueMapTy(); - COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)UniquingMap; - - // Do the lookup, if we have a hit, return it. - const MCSectionCOFF *&Entry = Map[Name]; - if (Entry) return Entry; - - return Entry = MCSectionCOFF::Create(Name, isDirective, Kind, getContext()); -} - void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, const TargetMachine &TM) { - if (UniquingMap != 0) - ((COFFUniqueMapTy*)UniquingMap)->clear(); TargetLoweringObjectFile::Initialize(Ctx, TM); - TextSection = getCOFFSection("\t.text", true, SectionKind::getText()); - DataSection = getCOFFSection("\t.data", true, SectionKind::getDataRel()); + TextSection = + getContext().getCOFFSection(".text", + MCSectionCOFF::IMAGE_SCN_CNT_CODE | + MCSectionCOFF::IMAGE_SCN_MEM_EXECUTE | + MCSectionCOFF::IMAGE_SCN_MEM_READ, + SectionKind::getText()); + DataSection = + getContext().getCOFFSection(".data", + MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + MCSectionCOFF::IMAGE_SCN_MEM_READ | + MCSectionCOFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); + ReadOnlySection = + getContext().getCOFFSection(".rdata", + MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + MCSectionCOFF::IMAGE_SCN_MEM_READ, + SectionKind::getReadOnly()); StaticCtorSection = - getCOFFSection(".ctors", false, SectionKind::getDataRel()); + getContext().getCOFFSection(".ctors", + MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + MCSectionCOFF::IMAGE_SCN_MEM_READ | + MCSectionCOFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); StaticDtorSection = - getCOFFSection(".dtors", false, SectionKind::getDataRel()); + getContext().getCOFFSection(".dtors", + MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + MCSectionCOFF::IMAGE_SCN_MEM_READ | + MCSectionCOFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); // FIXME: We're emitting LSDA info into a readonly section on COFF, even // though it contains relocatable pointers. In PIC mode, this is probably a // big runtime hit for C++ apps. Either the contents of the LSDA need to be // adjusted or this should be a data section. LSDASection = - getCOFFSection(".gcc_except_table", false, SectionKind::getReadOnly()); + getContext().getCOFFSection(".gcc_except_table", + MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + MCSectionCOFF::IMAGE_SCN_MEM_READ, + SectionKind::getReadOnly()); EHFrameSection = - getCOFFSection(".eh_frame", false, SectionKind::getDataRel()); + getContext().getCOFFSection(".eh_frame", + MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + MCSectionCOFF::IMAGE_SCN_MEM_READ | + MCSectionCOFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); // Debug info. - // FIXME: Don't use 'directive' mode here. DwarfAbbrevSection = - getCOFFSection("\t.section\t.debug_abbrev,\"dr\"", - true, SectionKind::getMetadata()); + getContext().getCOFFSection(".debug_abbrev", + MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | + MCSectionCOFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); DwarfInfoSection = - getCOFFSection("\t.section\t.debug_info,\"dr\"", - true, SectionKind::getMetadata()); + getContext().getCOFFSection(".debug_info", + MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | + MCSectionCOFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); DwarfLineSection = - getCOFFSection("\t.section\t.debug_line,\"dr\"", - true, SectionKind::getMetadata()); + getContext().getCOFFSection(".debug_line", + MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | + MCSectionCOFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); DwarfFrameSection = - getCOFFSection("\t.section\t.debug_frame,\"dr\"", - true, SectionKind::getMetadata()); + getContext().getCOFFSection(".debug_frame", + MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | + MCSectionCOFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); DwarfPubNamesSection = - getCOFFSection("\t.section\t.debug_pubnames,\"dr\"", - true, SectionKind::getMetadata()); + getContext().getCOFFSection(".debug_pubnames", + MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | + MCSectionCOFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); DwarfPubTypesSection = - getCOFFSection("\t.section\t.debug_pubtypes,\"dr\"", - true, SectionKind::getMetadata()); + getContext().getCOFFSection(".debug_pubtypes", + MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | + MCSectionCOFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); DwarfStrSection = - getCOFFSection("\t.section\t.debug_str,\"dr\"", - true, SectionKind::getMetadata()); + getContext().getCOFFSection(".debug_str", + MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | + MCSectionCOFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); DwarfLocSection = - getCOFFSection("\t.section\t.debug_loc,\"dr\"", - true, SectionKind::getMetadata()); + getContext().getCOFFSection(".debug_loc", + MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | + MCSectionCOFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); DwarfARangesSection = - getCOFFSection("\t.section\t.debug_aranges,\"dr\"", - true, SectionKind::getMetadata()); + getContext().getCOFFSection(".debug_aranges", + MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | + MCSectionCOFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); DwarfRangesSection = - getCOFFSection("\t.section\t.debug_ranges,\"dr\"", - true, SectionKind::getMetadata()); + getContext().getCOFFSection(".debug_ranges", + MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | + MCSectionCOFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); DwarfMacroInfoSection = - getCOFFSection("\t.section\t.debug_macinfo,\"dr\"", - true, SectionKind::getMetadata()); + getContext().getCOFFSection(".debug_macinfo", + MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | + MCSectionCOFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + + DrectveSection = + getContext().getCOFFSection(".drectve", + MCSectionCOFF::IMAGE_SCN_LNK_INFO, + SectionKind::getMetadata()); +} + +static unsigned +getCOFFSectionFlags(SectionKind K) { + unsigned Flags = 0; + + if (!K.isMetadata()) + Flags |= + MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE; + else if (K.isText()) + Flags |= + MCSectionCOFF::IMAGE_SCN_MEM_EXECUTE | + MCSectionCOFF::IMAGE_SCN_CNT_CODE; + else if (K.isBSS ()) + Flags |= + MCSectionCOFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | + MCSectionCOFF::IMAGE_SCN_MEM_READ | + MCSectionCOFF::IMAGE_SCN_MEM_WRITE; + else if (K.isReadOnly()) + Flags |= + MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + MCSectionCOFF::IMAGE_SCN_MEM_READ; + else if (K.isWriteable()) + Flags |= + MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + MCSectionCOFF::IMAGE_SCN_MEM_READ | + MCSectionCOFF::IMAGE_SCN_MEM_WRITE; + + return Flags; } const MCSection *TargetLoweringObjectFileCOFF:: getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { - return getCOFFSection(GV->getSection(), false, Kind); + return getContext().getCOFFSection(GV->getSection(), + getCOFFSectionFlags(Kind), + Kind); } static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { if (Kind.isText()) return ".text$linkonce"; + if (Kind.isBSS ()) + return ".bss$linkonce"; if (Kind.isWriteable()) return ".data$linkonce"; return ".rdata$linkonce"; @@ -900,7 +992,13 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); MCSymbol *Sym = Mang->getSymbol(GV); Name.append(Sym->getName().begin(), Sym->getName().end()); - return getCOFFSection(Name.str(), false, Kind); + + unsigned Characteristics = getCOFFSectionFlags(Kind); + + Characteristics |= MCSectionCOFF::IMAGE_SCN_LNK_COMDAT; + + return getContext().getCOFFSection(Name.str(), Characteristics, + MCSectionCOFF::IMAGE_COMDAT_SELECT_EXACT_MATCH, Kind); } if (Kind.isText()) diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index c288ae0..3d10dc1 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -40,6 +40,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" @@ -77,6 +78,10 @@ namespace { // registers from virtual registers. e.g. r1 = move v1024. DenseMap<unsigned, unsigned> DstRegMap; + /// RegSequences - Keep track the list of REG_SEQUENCE instructions seen + /// during the initial walk of the machine function. + SmallVector<MachineInstr*, 16> RegSequences; + bool Sink3AddrInstruction(MachineBasicBlock *MBB, MachineInstr *MI, unsigned Reg, MachineBasicBlock::iterator OldPos); @@ -123,6 +128,13 @@ namespace { void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &Processed); + void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg); + + /// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part + /// of the de-ssa process. This replaces sources of REG_SEQUENCE as + /// sub-register references of the register defined by REG_SEQUENCE. + bool EliminateRegSequences(); + public: static char ID; // Pass identification, replacement for typeid TwoAddressInstructionPass() : MachineFunctionPass(&ID) {} @@ -768,7 +780,7 @@ canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills, if (!LastKill) return false; - bool isModRef = LastKill->modifiesRegister(Kill); + bool isModRef = LastKill->definesRegister(Kill); NewKills.push_back(std::make_pair(std::make_pair(Kill, isModRef), LastKill)); } @@ -929,6 +941,10 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { continue; } + // Remember REG_SEQUENCE instructions, we'll deal with them later. + if (mi->isRegSequence()) + RegSequences.push_back(&*mi); + const TargetInstrDesc &TID = mi->getDesc(); bool FirstTied = true; @@ -1035,7 +1051,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { ReMatRegs.set(regB); ++NumReMats; } else { - bool Emitted = TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc); + bool Emitted = TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc, + mi->getDebugLoc()); (void)Emitted; assert(Emitted && "Unable to issue a copy instruction!\n"); } @@ -1110,5 +1127,211 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { VReg = ReMatRegs.find_next(VReg); } + // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve + // SSA form. It's now safe to de-SSA. + MadeChange |= EliminateRegSequences(); + return MadeChange; } + +static void UpdateRegSequenceSrcs(unsigned SrcReg, + unsigned DstReg, unsigned SubIdx, + MachineRegisterInfo *MRI) { + for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), + RE = MRI->reg_end(); RI != RE; ) { + MachineOperand &MO = RI.getOperand(); + ++RI; + MO.setReg(DstReg); + assert(MO.getSubReg() == 0); + MO.setSubReg(SubIdx); + } +} + +/// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are +/// EXTRACT_SUBREG from the same register and to the same virtual register +/// with different sub-register indices, attempt to combine the +/// EXTRACT_SUBREGs and pre-coalesce them. e.g. +/// %reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0 +/// %reg1029:6<def> = EXTRACT_SUBREG %reg1026, 6 +/// %reg1029:5<def> = EXTRACT_SUBREG %reg1026<kill>, 5 +/// Since D subregs 5, 6 can combine to a Q register, we can coalesce +/// reg1026 to reg1029. +void +TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, + unsigned DstReg) { + SmallSet<unsigned, 4> Seen; + for (unsigned i = 0, e = Srcs.size(); i != e; ++i) { + unsigned SrcReg = Srcs[i]; + if (!Seen.insert(SrcReg)) + continue; + + // If there are no other uses than extract_subreg which feed into + // the reg_sequence, then we might be able to coalesce them. + bool CanCoalesce = true; + SmallVector<unsigned, 4> SubIndices; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SrcReg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + if (!UseMI->isExtractSubreg() || + UseMI->getOperand(0).getReg() != DstReg) { + CanCoalesce = false; + break; + } + SubIndices.push_back(UseMI->getOperand(2).getImm()); + } + + if (!CanCoalesce || SubIndices.size() < 2) + continue; + + std::sort(SubIndices.begin(), SubIndices.end()); + unsigned NewSubIdx = 0; + if (TRI->canCombinedSubRegIndex(MRI->getRegClass(SrcReg), SubIndices, + NewSubIdx)) { + bool Proceed = true; + if (NewSubIdx) + for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), + RE = MRI->reg_end(); RI != RE; ) { + MachineOperand &MO = RI.getOperand(); + ++RI; + // FIXME: If the sub-registers do not combine to the whole + // super-register, i.e. NewSubIdx != 0, and any of the use has a + // sub-register index, then abort the coalescing attempt. + if (MO.getSubReg()) { + Proceed = false; + break; + } + MO.setReg(DstReg); + MO.setSubReg(NewSubIdx); + } + if (Proceed) + for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), + RE = MRI->reg_end(); RI != RE; ) { + MachineOperand &MO = RI.getOperand(); + ++RI; + MO.setReg(DstReg); + if (NewSubIdx) + MO.setSubReg(NewSubIdx); + } + } + } +} + +static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq, + MachineRegisterInfo *MRI) { + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), + UE = MRI->use_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + if (UseMI != RegSeq && UseMI->isRegSequence()) + return true; + } + return false; +} + +/// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part +/// of the de-ssa process. This replaces sources of REG_SEQUENCE as +/// sub-register references of the register defined by REG_SEQUENCE. e.g. +/// +/// %reg1029<def>, %reg1030<def> = VLD1q16 %reg1024<kill>, ... +/// %reg1031<def> = REG_SEQUENCE %reg1029<kill>, 5, %reg1030<kill>, 6 +/// => +/// %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ... +bool TwoAddressInstructionPass::EliminateRegSequences() { + if (RegSequences.empty()) + return false; + + for (unsigned i = 0, e = RegSequences.size(); i != e; ++i) { + MachineInstr *MI = RegSequences[i]; + unsigned DstReg = MI->getOperand(0).getReg(); + if (MI->getOperand(0).getSubReg() || + TargetRegisterInfo::isPhysicalRegister(DstReg) || + !(MI->getNumOperands() & 1)) { + DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI); + llvm_unreachable(0); + } + + bool IsImpDef = true; + SmallVector<unsigned, 4> RealSrcs; + SmallSet<unsigned, 4> Seen; + for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { + unsigned SrcReg = MI->getOperand(i).getReg(); + if (MI->getOperand(i).getSubReg() || + TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI); + llvm_unreachable(0); + } + + MachineInstr *DefMI = MRI->getVRegDef(SrcReg); + if (DefMI->isImplicitDef()) { + DefMI->eraseFromParent(); + continue; + } + IsImpDef = false; + + // Remember EXTRACT_SUBREG sources. These might be candidate for + // coalescing. + if (DefMI->isExtractSubreg()) + RealSrcs.push_back(DefMI->getOperand(1).getReg()); + + if (!Seen.insert(SrcReg) || + MI->getParent() != DefMI->getParent() || + !MI->getOperand(i).isKill() || + HasOtherRegSequenceUses(SrcReg, MI, MRI)) { + // REG_SEQUENCE cannot have duplicated operands, add a copy. + // Also add an copy if the source is live-in the block. We don't want + // to end up with a partial-redef of a livein, e.g. + // BB0: + // reg1051:10<def> = + // ... + // BB1: + // ... = reg1051:10 + // BB2: + // reg1051:9<def> = + // LiveIntervalAnalysis won't like it. + // + // If the REG_SEQUENCE doesn't kill its source, keeping live variables + // correctly up to date becomes very difficult. Insert a copy. + // + const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); + unsigned NewReg = MRI->createVirtualRegister(RC); + MachineBasicBlock::iterator InsertLoc = MI; + bool Emitted = + TII->copyRegToReg(*MI->getParent(), InsertLoc, NewReg, SrcReg, RC, RC, + MI->getDebugLoc()); + (void)Emitted; + assert(Emitted && "Unable to issue a copy instruction!\n"); + MI->getOperand(i).setReg(NewReg); + if (MI->getOperand(i).isKill()) { + MachineBasicBlock::iterator CopyMI = prior(InsertLoc); + MachineOperand *KillMO = CopyMI->findRegisterUseOperand(SrcReg); + KillMO->setIsKill(); + if (LV) + // Update live variables + LV->replaceKillInstruction(SrcReg, MI, &*CopyMI); + } + } + } + + for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { + unsigned SrcReg = MI->getOperand(i).getReg(); + unsigned SubIdx = MI->getOperand(i+1).getImm(); + UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI); + } + + if (IsImpDef) { + DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF"); + MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); + for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) + MI->RemoveOperand(j); + } else { + DEBUG(dbgs() << "Eliminated: " << *MI); + MI->eraseFromParent(); + } + + // Try coalescing some EXTRACT_SUBREG instructions. + CoalesceExtSubRegs(RealSrcs, DstReg); + } + + RegSequences.clear(); + return true; +} diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index 7f0412c..871d836 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -907,7 +907,7 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC, TRI, VRM); } else { TII->loadRegFromStackSlot(*MBB, InsertLoc, NewPhysReg, - NewOp.StackSlotOrReMat, AliasRC); + NewOp.StackSlotOrReMat, AliasRC, TRI); MachineInstr *LoadMI = prior(InsertLoc); VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI); // Any stores to this stack slot are not dead anymore. @@ -1265,7 +1265,7 @@ OptimizeByUnfold2(unsigned VirtReg, int SS, ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, false, SS, TII, MF); // Load from SS to the spare physical register. - TII->loadRegFromStackSlot(*MBB, MII, PhysReg, SS, RC); + TII->loadRegFromStackSlot(*MBB, MII, PhysReg, SS, RC, TRI); // This invalidates Phys. Spills.ClobberPhysReg(PhysReg); // Remember it's available. @@ -1308,7 +1308,7 @@ OptimizeByUnfold2(unsigned VirtReg, int SS, } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM)); // Store the value back into SS. - TII->storeRegToStackSlot(*MBB, NextMII, PhysReg, true, SS, RC); + TII->storeRegToStackSlot(*MBB, NextMII, PhysReg, true, SS, RC, TRI); MachineInstr *StoreMI = prior(NextMII); VRM->addSpillSlotUse(SS, StoreMI); VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); @@ -1523,7 +1523,7 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII, VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef); // Insert new def MI and spill MI. const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - TII->storeRegToStackSlot(*MBB, &MI, NewReg, true, SS, RC); + TII->storeRegToStackSlot(*MBB, &MI, NewReg, true, SS, RC, TRI); MII = prior(MII); MachineInstr *StoreMI = MII; VRM->addSpillSlotUse(SS, StoreMI); @@ -1566,7 +1566,8 @@ SpillRegToStackSlot(MachineBasicBlock::iterator &MII, std::vector<MachineOperand*> &KillOps) { MachineBasicBlock::iterator oldNextMII = llvm::next(MII); - TII->storeRegToStackSlot(*MBB, llvm::next(MII), PhysReg, true, StackSlot, RC); + TII->storeRegToStackSlot(*MBB, llvm::next(MII), PhysReg, true, StackSlot, RC, + TRI); MachineInstr *StoreMI = prior(oldNextMII); VRM->addSpillSlotUse(StackSlot, StoreMI); DEBUG(dbgs() << "Store:\t" << *StoreMI); @@ -1709,7 +1710,7 @@ bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) { if (UsedSS.count(SS)) llvm_unreachable("Need to spill more than one physical registers!"); UsedSS.insert(SS); - TII->storeRegToStackSlot(*MBB, MII, PhysReg, true, SS, RC); + TII->storeRegToStackSlot(*MBB, MII, PhysReg, true, SS, RC, TRI); MachineInstr *StoreMI = prior(MII); VRM->addSpillSlotUse(SS, StoreMI); @@ -1718,7 +1719,7 @@ bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) { ComputeReloadLoc(llvm::next(MII), MBB->begin(), PhysReg, TRI, false, SS, TII, *MBB->getParent()); - TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SS, RC); + TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SS, RC, TRI); MachineInstr *LoadMI = prior(InsertLoc); VRM->addSpillSlotUse(SS, LoadMI); @@ -1793,7 +1794,8 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI, ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII, *MBB->getParent()); - TII->copyRegToReg(*MBB, InsertLoc, Phys, InReg, RC, RC); + TII->copyRegToReg(*MBB, InsertLoc, Phys, InReg, RC, RC, + MI->getDebugLoc()); // This invalidates Phys. Spills.ClobberPhysReg(Phys); @@ -1821,7 +1823,7 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI, ReMaterialize(*MBB, InsertLoc, Phys, VirtReg, TII, TRI, *VRM); } else { const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - TII->loadRegFromStackSlot(*MBB, InsertLoc, Phys, SSorRMId, RC); + TII->loadRegFromStackSlot(*MBB, InsertLoc, Phys, SSorRMId, RC, TRI); MachineInstr *LoadMI = prior(InsertLoc); VRM->addSpillSlotUse(SSorRMId, LoadMI); ++NumLoads; @@ -1857,7 +1859,7 @@ bool LocalRewriter::InsertSpills(MachineInstr *MI) { int StackSlot = VRM->getStackSlot(VirtReg); MachineBasicBlock::iterator oldNextMII = llvm::next(MII); TII->storeRegToStackSlot(*MBB, llvm::next(MII), Phys, isKill, StackSlot, - RC); + RC, TRI); MachineInstr *StoreMI = prior(oldNextMII); VRM->addSpillSlotUse(StackSlot, StoreMI); DEBUG(dbgs() << "Store:\t" << *StoreMI); @@ -1893,6 +1895,11 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, // Clear kill info. SmallSet<unsigned, 2> KilledMIRegs; + + // Keep track of the registers we have already spilled in case there are + // multiple defs of the same register in MI. + SmallSet<unsigned, 8> SpilledMIRegs; + RegKills.reset(); KillOps.clear(); KillOps.resize(TRI->getNumRegs(), NULL); @@ -2138,7 +2145,8 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat, SSorRMId, TII, MF); - TII->copyRegToReg(*MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC); + TII->copyRegToReg(*MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC, + MI.getDebugLoc()); MachineInstr *CopyMI = prior(InsertLoc); CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse); @@ -2183,7 +2191,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM); } else { const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC); + TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI); MachineInstr *LoadMI = prior(InsertLoc); VRM->addSpillSlotUse(SSorRMId, LoadMI); ++NumLoads; @@ -2262,7 +2270,8 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, DEBUG(dbgs() << "Promoted Load To Copy: " << MI); if (DestReg != InReg) { const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - TII->copyRegToReg(*MBB, &MI, DestReg, InReg, RC, RC); + TII->copyRegToReg(*MBB, &MI, DestReg, InReg, RC, RC, + MI.getDebugLoc()); MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg); unsigned SubIdx = DefMO->getSubReg(); // Revisit the copy so we make sure to notice the effects of the @@ -2408,6 +2417,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, } // Process all of the spilled defs. + SpilledMIRegs.clear(); for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!(MO.isReg() && MO.getReg() && MO.isDef())) @@ -2421,7 +2431,8 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, // eliminate this or else the undef marker is lost and it will // confuses the scavenger. This is extremely rare. unsigned Src, Dst, SrcSR, DstSR; - if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst && + if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && + Src == Dst && SrcSR == DstSR && !MI.findRegisterUseOperand(Src)->isUndef()) { ++NumDCE; DEBUG(dbgs() << "Removing now-noop copy: " << MI); @@ -2500,7 +2511,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, MI.getOperand(i).setReg(RReg); MI.getOperand(i).setSubReg(0); - if (!MO.isDead()) { + if (!MO.isDead() && SpilledMIRegs.insert(VirtReg)) { MachineInstr *&LastStore = MaybeDeadStores[StackSlot]; SpillRegToStackSlot(MII, -1, PhysReg, StackSlot, RC, true, LastStore, Spills, ReMatDefs, RegKills, KillOps); @@ -2510,7 +2521,8 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, // instruction before considering the dest reg to be changed. { unsigned Src, Dst, SrcSR, DstSR; - if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) { + if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && + Src == Dst && SrcSR == DstSR) { ++NumDCE; DEBUG(dbgs() << "Removing now-noop copy: " << MI); InvalidateKills(MI, TRI, RegKills, KillOps); diff --git a/lib/CompilerDriver/Action.cpp b/lib/CompilerDriver/Action.cpp index 9d07811..5f30dce 100644 --- a/lib/CompilerDriver/Action.cpp +++ b/lib/CompilerDriver/Action.cpp @@ -33,8 +33,27 @@ extern const char* ProgramName; } namespace { - int ExecuteProgram(const std::string& name, - const StrVector& args) { + + void PrintString (const std::string& str) { + errs() << str << ' '; + } + + void PrintCommand (const std::string& Cmd, const StrVector& Args) { + errs() << Cmd << ' '; + std::for_each(Args.begin(), Args.end(), &PrintString); + errs() << '\n'; + } + + bool IsSegmentationFault (int returnCode) { +#ifdef LLVM_ON_WIN32 + return (returnCode >= 0xc0000000UL) +#else + return (returnCode < 0); +#endif + } + + int ExecuteProgram (const std::string& name, + const StrVector& args) { sys::Path prog = sys::Program::FindProgramByName(name); if (prog.isEmpty()) { @@ -67,24 +86,25 @@ namespace { argv.push_back(0); // null terminate list. // Invoke the program. - return sys::Program::ExecuteAndWait(prog, &argv[0], 0, &redirects[0]); - } + int ret = sys::Program::ExecuteAndWait(prog, &argv[0], 0, &redirects[0]); - void print_string (const std::string& str) { - errs() << str << ' '; + if (IsSegmentationFault(ret)) { + errs() << "Segmentation fault: "; + PrintCommand(name, args); + } + + return ret; } } namespace llvmc { - void AppendToGlobalTimeLog(const std::string& cmd, double time); + void AppendToGlobalTimeLog (const std::string& cmd, double time); } -int llvmc::Action::Execute() const { - if (DryRun || VerboseMode) { - errs() << Command_ << " "; - std::for_each(Args_.begin(), Args_.end(), print_string); - errs() << '\n'; - } +int llvmc::Action::Execute () const { + if (DryRun || VerboseMode) + PrintCommand(Command_, Args_); + if (!DryRun) { if (Time) { sys::TimeValue now = sys::TimeValue::now(); diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index b17827e..be7f1f5 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -715,7 +715,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { case Instruction::FDiv: GV.FloatVal = LHS.FloatVal / RHS.FloatVal; break; case Instruction::FRem: - GV.FloatVal = ::fmodf(LHS.FloatVal,RHS.FloatVal); break; + GV.FloatVal = std::fmod(LHS.FloatVal,RHS.FloatVal); break; } break; case Type::DoubleTyID: @@ -730,7 +730,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { case Instruction::FDiv: GV.DoubleVal = LHS.DoubleVal / RHS.DoubleVal; break; case Instruction::FRem: - GV.DoubleVal = ::fmod(LHS.DoubleVal,RHS.DoubleVal); break; + GV.DoubleVal = std::fmod(LHS.DoubleVal,RHS.DoubleVal); break; } break; case Type::X86_FP80TyID: diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index dba0e14..5e8a3b6 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -10,10 +10,13 @@ add_llvm_library(LLVMMC MCExpr.cpp MCInst.cpp MCInstPrinter.cpp + MCLabel.cpp + MCLoggingStreamer.cpp MCMachOStreamer.cpp MCNullStreamer.cpp MCObjectWriter.cpp MCSection.cpp + MCSectionCOFF.cpp MCSectionELF.cpp MCSectionMachO.cpp MCStreamer.cpp diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index 2b23994..a275be2 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -21,6 +21,7 @@ using namespace llvm; MCAsmInfo::MCAsmInfo() { HasSubsectionsViaSymbols = false; HasMachoZeroFillDirective = false; + HasMachoTBSSDirective = false; HasStaticCtorDtorReferenceInStaticMode = false; MaxInstLength = 4; PCSymbol = "$"; diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp index 3c31caa..0bd3b2d 100644 --- a/lib/MC/MCAsmInfoDarwin.cpp +++ b/lib/MC/MCAsmInfoDarwin.cpp @@ -35,6 +35,7 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() { WeakRefDirective = "\t.weak_reference "; ZeroDirective = "\t.space\t"; // ".space N" emits N zeros. HasMachoZeroFillDirective = true; // Uses .zerofill + HasMachoTBSSDirective = true; // Uses .tbss HasStaticCtorDtorReferenceInStaticMode = true; HiddenVisibilityAttr = MCSA_PrivateExtern; diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 2c7e1c4..57b2bcc 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -109,7 +109,10 @@ public: virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); - + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol); + virtual void EmitCOFFSymbolStorageClass(int StorageClass); + virtual void EmitCOFFSymbolType(int Type); + virtual void EndCOFFSymbolDef(); virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value); virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment); @@ -123,6 +126,9 @@ public: virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, unsigned Size = 0, unsigned ByteAlignment = 0); + virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment = 0); + virtual void EmitBytes(StringRef Data, unsigned AddrSpace); virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace); @@ -218,6 +224,7 @@ void MCAsmStreamer::SwitchSection(const MCSection *Section) { void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); assert(CurSection && "Cannot emit before setting section!"); OS << *Symbol << ":"; @@ -234,16 +241,11 @@ void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { } void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { - // Only absolute symbols can be redefined. - assert((Symbol->isUndefined() || Symbol->isAbsolute()) && - "Cannot define a symbol twice!"); - OS << *Symbol << " = " << *Value; EmitEOL(); // FIXME: Lift context changes into super class. - // FIXME: Set associated section. - Symbol->setValue(Value); + Symbol->setVariableValue(Value); } void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, @@ -297,6 +299,26 @@ void MCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { EmitEOL(); } +void MCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) { + OS << "\t.def\t " << *Symbol << ';'; + EmitEOL(); +} + +void MCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) { + OS << "\t.scl\t" << StorageClass << ';'; + EmitEOL(); +} + +void MCAsmStreamer::EmitCOFFSymbolType (int Type) { + OS << "\t.type\t" << Type << ';'; + EmitEOL(); +} + +void MCAsmStreamer::EndCOFFSymbolDef() { + OS << "\t.endef"; + EmitEOL(); +} + void MCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { assert(MAI.hasDotTypeDotSizeDirective()); OS << "\t.size\t" << *Symbol << ", " << *Value << '\n'; @@ -341,6 +363,23 @@ void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, EmitEOL(); } +// .tbss sym, size, align +// This depends that the symbol has already been mangled from the original, +// e.g. _a. +void MCAsmStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment) { + assert(Symbol != NULL && "Symbol shouldn't be NULL!"); + // Instead of using the Section we'll just use the shortcut. + // This is a mach-o specific directive and section. + OS << ".tbss " << *Symbol << ", " << Size; + + // Output align if we have it. We default to 1 so don't bother printing + // that. + if (ByteAlignment > 1) OS << ", " << Log2_32(ByteAlignment); + + EmitEOL(); +} + static inline char toOctal(int X) { return (X&7)+'0'; } static void PrintQuotedString(StringRef Data, raw_ostream &OS) { @@ -630,9 +669,11 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst) { AddEncodingComment(Inst); // Show the MCInst if enabled. - if (ShowInst) + if (ShowInst) { Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n "); - + GetCommentOS() << "\n"; + } + // If we have an AsmPrinter, use that to print, otherwise print the MCInst. if (InstPrinter) InstPrinter->printInst(&Inst, OS); diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 69afcc8..5936656 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -47,93 +47,131 @@ STATISTIC(SectionLayouts, "Number of section layouts"); /* *** */ -void MCAsmLayout::UpdateForSlide(MCFragment *F, int SlideAmount) { - // We shouldn't have to do anything special to support negative slides, and it - // is a perfectly valid thing to do as long as other parts of the system are - // can guarantee convergence. - assert(SlideAmount >= 0 && "Negative slides not yet supported"); +MCAsmLayout::MCAsmLayout(MCAssembler &Asm) + : Assembler(Asm), LastValidFragment(0) + { + // Compute the section layout order. Virtual sections must go last. + for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) + if (!Asm.getBackend().isVirtualSection(it->getSection())) + SectionOrder.push_back(&*it); + for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) + if (Asm.getBackend().isVirtualSection(it->getSection())) + SectionOrder.push_back(&*it); +} - // Update the layout by simply recomputing the layout for the entire - // file. This is trivially correct, but very slow. - // - // FIXME-PERF: This is O(N^2), but will be eliminated once we get smarter. +bool MCAsmLayout::isSectionUpToDate(const MCSectionData *SD) const { + // The first section is always up-to-date. + unsigned Index = SD->getLayoutOrder(); + if (!Index) + return true; - // Layout the concrete sections and fragments. - MCAssembler &Asm = getAssembler(); - uint64_t Address = 0; - for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) { - // Skip virtual sections. - if (Asm.getBackend().isVirtualSection(it->getSection())) - continue; + // Otherwise, sections are always implicitly computed when the preceeding + // fragment is layed out. + const MCSectionData *Prev = getSectionOrder()[Index - 1]; + return isFragmentUpToDate(&(Prev->getFragmentList().back())); +} + +bool MCAsmLayout::isFragmentUpToDate(const MCFragment *F) const { + return (LastValidFragment && + F->getLayoutOrder() <= LastValidFragment->getLayoutOrder()); +} - // Layout the section fragments and its size. - Address = Asm.LayoutSection(*it, *this, Address); +void MCAsmLayout::UpdateForSlide(MCFragment *F, int SlideAmount) { + // If this fragment wasn't already up-to-date, we don't need to do anything. + if (!isFragmentUpToDate(F)) + return; + + // Otherwise, reset the last valid fragment to the predecessor of the + // invalidated fragment. + LastValidFragment = F->getPrevNode(); + if (!LastValidFragment) { + unsigned Index = F->getParent()->getLayoutOrder(); + if (Index != 0) { + MCSectionData *Prev = getSectionOrder()[Index - 1]; + LastValidFragment = &(Prev->getFragmentList().back()); + } } +} - // Layout the virtual sections. - for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) { - if (!Asm.getBackend().isVirtualSection(it->getSection())) - continue; +void MCAsmLayout::EnsureValid(const MCFragment *F) const { + // Advance the layout position until the fragment is up-to-date. + while (!isFragmentUpToDate(F)) { + // Advance to the next fragment. + MCFragment *Cur = LastValidFragment; + if (Cur) + Cur = Cur->getNextNode(); + if (!Cur) { + unsigned NextIndex = 0; + if (LastValidFragment) + NextIndex = LastValidFragment->getParent()->getLayoutOrder() + 1; + Cur = SectionOrder[NextIndex]->begin(); + } - // Layout the section fragments and its size. - Address = Asm.LayoutSection(*it, *this, Address); + const_cast<MCAsmLayout*>(this)->LayoutFragment(Cur); } } +void MCAsmLayout::FragmentReplaced(MCFragment *Src, MCFragment *Dst) { + if (LastValidFragment == Src) + LastValidFragment = Dst; + + Dst->Offset = Src->Offset; + Dst->EffectiveSize = Src->EffectiveSize; +} + uint64_t MCAsmLayout::getFragmentAddress(const MCFragment *F) const { assert(F->getParent() && "Missing section()!"); return getSectionAddress(F->getParent()) + getFragmentOffset(F); } uint64_t MCAsmLayout::getFragmentEffectiveSize(const MCFragment *F) const { + EnsureValid(F); assert(F->EffectiveSize != ~UINT64_C(0) && "Address not set!"); return F->EffectiveSize; } -void MCAsmLayout::setFragmentEffectiveSize(MCFragment *F, uint64_t Value) { - F->EffectiveSize = Value; -} - uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const { + EnsureValid(F); assert(F->Offset != ~UINT64_C(0) && "Address not set!"); return F->Offset; } -void MCAsmLayout::setFragmentOffset(MCFragment *F, uint64_t Value) { - F->Offset = Value; -} - uint64_t MCAsmLayout::getSymbolAddress(const MCSymbolData *SD) const { assert(SD->getFragment() && "Invalid getAddress() on undefined symbol!"); return getFragmentAddress(SD->getFragment()) + SD->getOffset(); } uint64_t MCAsmLayout::getSectionAddress(const MCSectionData *SD) const { + EnsureValid(SD->begin()); assert(SD->Address != ~UINT64_C(0) && "Address not set!"); return SD->Address; } -void MCAsmLayout::setSectionAddress(MCSectionData *SD, uint64_t Value) { - SD->Address = Value; -} - -uint64_t MCAsmLayout::getSectionSize(const MCSectionData *SD) const { - assert(SD->Size != ~UINT64_C(0) && "File size not set!"); - return SD->Size; -} -void MCAsmLayout::setSectionSize(MCSectionData *SD, uint64_t Value) { - SD->Size = Value; +uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const { + // The size is the last fragment's end offset. + const MCFragment &F = SD->getFragmentList().back(); + return getFragmentOffset(&F) + getFragmentEffectiveSize(&F); } uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const { - assert(SD->FileSize != ~UINT64_C(0) && "File size not set!"); - return SD->FileSize; -} -void MCAsmLayout::setSectionFileSize(MCSectionData *SD, uint64_t Value) { - SD->FileSize = Value; + // Virtual sections have no file size. + if (getAssembler().getBackend().isVirtualSection(SD->getSection())) + return 0; + + // Otherwise, the file size is the same as the address space size. + return getSectionAddressSize(SD); } - /// @} +uint64_t MCAsmLayout::getSectionSize(const MCSectionData *SD) const { + // The logical size is the address space size minus any tail padding. + uint64_t Size = getSectionAddressSize(SD); + const MCAlignFragment *AF = + dyn_cast<MCAlignFragment>(&(SD->getFragmentList().back())); + if (AF && AF->hasOnlyAlignAddress()) + Size -= getFragmentEffectiveSize(AF); + + return Size; +} /* *** */ @@ -141,17 +179,12 @@ MCFragment::MCFragment() : Kind(FragmentType(~0)) { } MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent) - : Kind(_Kind), - Parent(_Parent), - EffectiveSize(~UINT64_C(0)) + : Kind(_Kind), Parent(_Parent), Atom(0), EffectiveSize(~UINT64_C(0)) { if (Parent) Parent->getFragmentList().push_back(this); } -MCFragment::~MCFragment() { -} - /* *** */ MCSectionData::MCSectionData() : Section(0) {} @@ -160,8 +193,6 @@ MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A) : Section(&_Section), Alignment(1), Address(~UINT64_C(0)), - Size(~UINT64_C(0)), - FileSize(~UINT64_C(0)), HasInstructions(false) { if (A) @@ -195,7 +226,7 @@ MCAssembler::~MCAssembler() { } static bool isScatteredFixupFullyResolvedSimple(const MCAssembler &Asm, - const MCAsmFixup &Fixup, + const MCFixup &Fixup, const MCValue Target, const MCSection *BaseSection) { // The effective fixup address is @@ -233,7 +264,7 @@ static bool isScatteredFixupFullyResolvedSimple(const MCAssembler &Asm, static bool isScatteredFixupFullyResolved(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCAsmFixup &Fixup, + const MCFixup &Fixup, const MCValue Target, const MCSymbolData *BaseSymbol) { // The effective fixup address is @@ -291,36 +322,6 @@ bool MCAssembler::isSymbolLinkerVisible(const MCSymbolData *SD) const { SD->getFragment()->getParent()->getSection()); } -// FIXME-PERF: This routine is really slow. -const MCSymbolData *MCAssembler::getAtomForAddress(const MCAsmLayout &Layout, - const MCSectionData *Section, - uint64_t Address) const { - const MCSymbolData *Best = 0; - uint64_t BestAddress = 0; - - for (MCAssembler::const_symbol_iterator it = symbol_begin(), - ie = symbol_end(); it != ie; ++it) { - // Ignore non-linker visible symbols. - if (!isSymbolLinkerVisible(it)) - continue; - - // Ignore symbols not in the same section. - if (!it->getFragment() || it->getFragment()->getParent() != Section) - continue; - - // Otherwise, find the closest symbol preceding this address (ties are - // resolved in favor of the last defined symbol). - uint64_t SymbolAddress = Layout.getSymbolAddress(it); - if (SymbolAddress <= Address && (!Best || SymbolAddress >= BestAddress)) { - Best = it; - BestAddress = SymbolAddress; - } - } - - return Best; -} - -// FIXME-PERF: This routine is really slow. const MCSymbolData *MCAssembler::getAtom(const MCAsmLayout &Layout, const MCSymbolData *SD) const { // Linker visible symbols define atoms. @@ -331,17 +332,22 @@ const MCSymbolData *MCAssembler::getAtom(const MCAsmLayout &Layout, if (!SD->getFragment()) return 0; - // Otherwise, search by address. - return getAtomForAddress(Layout, SD->getFragment()->getParent(), - Layout.getSymbolAddress(SD)); + // Non-linker visible symbols in sections which can't be atomized have no + // defining atom. + if (!getBackend().isSectionAtomizable( + SD->getFragment()->getParent()->getSection())) + return 0; + + // Otherwise, return the atom for the containing fragment. + return SD->getFragment()->getAtom(); } bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, - const MCAsmFixup &Fixup, const MCFragment *DF, + const MCFixup &Fixup, const MCFragment *DF, MCValue &Target, uint64_t &Value) const { ++stats::EvaluateFixup; - if (!Fixup.Value->EvaluateAsRelocatable(Target, &Layout)) + if (!Fixup.getValue()->EvaluateAsRelocatable(Target, &Layout)) report_fatal_error("expected relocatable expression"); // FIXME: How do non-scattered symbols work in ELF? I presume the linker @@ -350,8 +356,8 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, Value = Target.getConstant(); - bool IsPCRel = - Emitter.getFixupKindInfo(Fixup.Kind).Flags & MCFixupKindInfo::FKF_IsPCRel; + bool IsPCRel = Emitter.getFixupKindInfo( + Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel; bool IsResolved = true; if (const MCSymbolRefExpr *A = Target.getSymA()) { if (A->getSymbol().isDefined()) @@ -374,8 +380,7 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, // symbol) that the fixup value is relative to. const MCSymbolData *BaseSymbol = 0; if (IsPCRel) { - BaseSymbol = getAtomForAddress( - Layout, DF->getParent(), Layout.getFragmentAddress(DF)+Fixup.Offset); + BaseSymbol = DF->getAtom(); if (!BaseSymbol) IsResolved = false; } @@ -394,117 +399,123 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, } if (IsPCRel) - Value -= Layout.getFragmentAddress(DF) + Fixup.Offset; + Value -= Layout.getFragmentAddress(DF) + Fixup.getOffset(); return IsResolved; } -uint64_t MCAssembler::LayoutSection(MCSectionData &SD, - MCAsmLayout &Layout, - uint64_t StartAddress) { - bool IsVirtual = getBackend().isVirtualSection(SD.getSection()); - - ++stats::SectionLayouts; - - // Align this section if necessary by adding padding bytes to the previous - // section. It is safe to adjust this out-of-band, because no symbol or - // fragment is allowed to point past the end of the section at any time. - if (uint64_t Pad = OffsetToAlignment(StartAddress, SD.getAlignment())) { - // Unless this section is virtual (where we are allowed to adjust the offset - // freely), the padding goes in the previous section. - if (!IsVirtual) { - // Find the previous non-virtual section. - iterator it = &SD; - assert(it != begin() && "Invalid initial section address!"); - for (--it; getBackend().isVirtualSection(it->getSection()); --it) ; - Layout.setSectionFileSize(&*it, Layout.getSectionFileSize(&*it) + Pad); - } +uint64_t MCAssembler::ComputeFragmentSize(MCAsmLayout &Layout, + const MCFragment &F, + uint64_t SectionAddress, + uint64_t FragmentOffset) const { + switch (F.getKind()) { + case MCFragment::FT_Data: + return cast<MCDataFragment>(F).getContents().size(); + case MCFragment::FT_Fill: + return cast<MCFillFragment>(F).getSize(); + case MCFragment::FT_Inst: + return cast<MCInstFragment>(F).getInstSize(); - StartAddress += Pad; - } + case MCFragment::FT_Align: { + const MCAlignFragment &AF = cast<MCAlignFragment>(F); - // Set the aligned section address. - Layout.setSectionAddress(&SD, StartAddress); + assert((!AF.hasOnlyAlignAddress() || !AF.getNextNode()) && + "Invalid OnlyAlignAddress bit, not the last fragment!"); - uint64_t Address = StartAddress; - for (MCSectionData::iterator it = SD.begin(), ie = SD.end(); it != ie; ++it) { - MCFragment &F = *it; + uint64_t Size = OffsetToAlignment(SectionAddress + FragmentOffset, + AF.getAlignment()); - ++stats::FragmentLayouts; + // Honor MaxBytesToEmit. + if (Size > AF.getMaxBytesToEmit()) + return 0; - uint64_t FragmentOffset = Address - StartAddress; - Layout.setFragmentOffset(&F, FragmentOffset); + return Size; + } - // Evaluate fragment size. - uint64_t EffectiveSize = 0; - switch (F.getKind()) { - case MCFragment::FT_Align: { - MCAlignFragment &AF = cast<MCAlignFragment>(F); + case MCFragment::FT_Org: { + const MCOrgFragment &OF = cast<MCOrgFragment>(F); - EffectiveSize = OffsetToAlignment(Address, AF.getAlignment()); - if (EffectiveSize > AF.getMaxBytesToEmit()) - EffectiveSize = 0; - break; - } + // FIXME: We should compute this sooner, we don't want to recurse here, and + // we would like to be more functional. + int64_t TargetLocation; + if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, &Layout)) + report_fatal_error("expected assembly-time absolute expression"); - case MCFragment::FT_Data: - EffectiveSize = cast<MCDataFragment>(F).getContents().size(); - break; + // FIXME: We need a way to communicate this error. + int64_t Offset = TargetLocation - FragmentOffset; + if (Offset < 0) + report_fatal_error("invalid .org offset '" + Twine(TargetLocation) + + "' (at offset '" + Twine(FragmentOffset) + "'"); - case MCFragment::FT_Fill: { - MCFillFragment &FF = cast<MCFillFragment>(F); - EffectiveSize = FF.getValueSize() * FF.getCount(); - break; - } + return Offset; + } + } - case MCFragment::FT_Inst: - EffectiveSize = cast<MCInstFragment>(F).getInstSize(); - break; + assert(0 && "invalid fragment kind"); + return 0; +} - case MCFragment::FT_Org: { - MCOrgFragment &OF = cast<MCOrgFragment>(F); +void MCAsmLayout::LayoutFile() { + // Initialize the first section and set the valid fragment layout point. All + // actual layout computations are done lazily. + LastValidFragment = 0; + if (!getSectionOrder().empty()) + getSectionOrder().front()->Address = 0; +} - int64_t TargetLocation; - if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, &Layout)) - report_fatal_error("expected assembly-time absolute expression"); +void MCAsmLayout::LayoutFragment(MCFragment *F) { + MCFragment *Prev = F->getPrevNode(); - // FIXME: We need a way to communicate this error. - int64_t Offset = TargetLocation - FragmentOffset; - if (Offset < 0) - report_fatal_error("invalid .org offset '" + Twine(TargetLocation) + - "' (at offset '" + Twine(FragmentOffset) + "'"); + // We should never try to recompute something which is up-to-date. + assert(!isFragmentUpToDate(F) && "Attempt to recompute up-to-date fragment!"); + // We should never try to compute the fragment layout if the section isn't + // up-to-date. + assert(isSectionUpToDate(F->getParent()) && + "Attempt to compute fragment before it's section!"); + // We should never try to compute the fragment layout if it's predecessor + // isn't up-to-date. + assert((!Prev || isFragmentUpToDate(Prev)) && + "Attempt to compute fragment before it's predecessor!"); - EffectiveSize = Offset; - break; - } + ++stats::FragmentLayouts; - case MCFragment::FT_ZeroFill: { - MCZeroFillFragment &ZFF = cast<MCZeroFillFragment>(F); + // Compute the fragment start address. + uint64_t StartAddress = F->getParent()->Address; + uint64_t Address = StartAddress; + if (Prev) + Address += Prev->Offset + Prev->EffectiveSize; + + // Compute fragment offset and size. + F->Offset = Address - StartAddress; + F->EffectiveSize = getAssembler().ComputeFragmentSize(*this, *F, StartAddress, + F->Offset); + LastValidFragment = F; + + // If this is the last fragment in a section, update the next section address. + if (!F->getNextNode()) { + unsigned NextIndex = F->getParent()->getLayoutOrder() + 1; + if (NextIndex != getSectionOrder().size()) + LayoutSection(getSectionOrder()[NextIndex]); + } +} - // Align the fragment offset; it is safe to adjust the offset freely since - // this is only in virtual sections. - // - // FIXME: We shouldn't be doing this here. - Address = RoundUpToAlignment(Address, ZFF.getAlignment()); - Layout.setFragmentOffset(&F, Address - StartAddress); +void MCAsmLayout::LayoutSection(MCSectionData *SD) { + unsigned SectionOrderIndex = SD->getLayoutOrder(); - EffectiveSize = ZFF.getSize(); - break; - } - } + ++stats::SectionLayouts; - Layout.setFragmentEffectiveSize(&F, EffectiveSize); - Address += EffectiveSize; + // Compute the section start address. + uint64_t StartAddress = 0; + if (SectionOrderIndex) { + MCSectionData *Prev = getSectionOrder()[SectionOrderIndex - 1]; + StartAddress = getSectionAddress(Prev) + getSectionAddressSize(Prev); } - // Set the section sizes. - Layout.setSectionSize(&SD, Address - StartAddress); - if (IsVirtual) - Layout.setSectionFileSize(&SD, 0); - else - Layout.setSectionFileSize(&SD, Address - StartAddress); + // Honor the section alignment requirements. + StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment()); - return Address; + // Set the section address. + SD->Address = StartAddress; } /// WriteFragmentData - Write the \arg F data to the output file. @@ -522,6 +533,8 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout, MCAlignFragment &AF = cast<MCAlignFragment>(F); uint64_t Count = FragmentSize / AF.getValueSize(); + assert(AF.getValueSize() && "Invalid virtual align in concrete fragment!"); + // FIXME: This error shouldn't actually occur (the front end should emit // multiple .align directives to enforce the semantics it wants), but is // severe enough that we want to report it. How to handle this? @@ -535,7 +548,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout, // the Count bytes. Then if that did not fill any bytes or there are any // bytes left to fill use the the Value and ValueSize to fill the rest. // If we are aligning with nops, ask that target to emit the right data. - if (AF.getEmitNops()) { + if (AF.hasEmitNops()) { if (!Asm.getBackend().WriteNopData(Count, OW)) report_fatal_error("unable to write nop sequence of " + Twine(Count) + " bytes"); @@ -565,7 +578,10 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout, case MCFragment::FT_Fill: { MCFillFragment &FF = cast<MCFillFragment>(F); - for (uint64_t i = 0, e = FF.getCount(); i != e; ++i) { + + assert(FF.getValueSize() && "Invalid virtual align in concrete fragment!"); + + for (uint64_t i = 0, e = FF.getSize() / FF.getValueSize(); i != e; ++i) { switch (FF.getValueSize()) { default: assert(0 && "Invalid size!"); @@ -590,11 +606,6 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout, break; } - - case MCFragment::FT_ZeroFill: { - assert(0 && "Invalid zero fill fragment in concrete section!"); - break; - } } assert(OW->getStream().tell() - Start == FragmentSize); @@ -603,12 +614,27 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout, void MCAssembler::WriteSectionData(const MCSectionData *SD, const MCAsmLayout &Layout, MCObjectWriter *OW) const { - uint64_t SectionSize = Layout.getSectionSize(SD); - uint64_t SectionFileSize = Layout.getSectionFileSize(SD); - // Ignore virtual sections. if (getBackend().isVirtualSection(SD->getSection())) { - assert(SectionFileSize == 0 && "Invalid size for section!"); + assert(Layout.getSectionFileSize(SD) == 0 && "Invalid size for section!"); + + // Check that contents are only things legal inside a virtual section. + for (MCSectionData::const_iterator it = SD->begin(), + ie = SD->end(); it != ie; ++it) { + switch (it->getKind()) { + default: + assert(0 && "Invalid fragment in virtual section!"); + case MCFragment::FT_Align: + assert(!cast<MCAlignFragment>(it)->getValueSize() && + "Invalid align in virtual section!"); + break; + case MCFragment::FT_Fill: + assert(!cast<MCFillFragment>(it)->getValueSize() && + "Invalid fill in virtual section!"); + break; + } + } + return; } @@ -619,11 +645,7 @@ void MCAssembler::WriteSectionData(const MCSectionData *SD, ie = SD->end(); it != ie; ++it) WriteFragmentData(*this, Layout, *it, OW); - // Add section padding. - assert(SectionFileSize >= SectionSize && "Invalid section sizes!"); - OW->WriteZeros(SectionFileSize - SectionSize); - - assert(OW->getStream().tell() - Start == SectionFileSize); + assert(OW->getStream().tell() - Start == Layout.getSectionFileSize(SD)); } void MCAssembler::Finish() { @@ -631,20 +653,60 @@ void MCAssembler::Finish() { llvm::errs() << "assembler backend - pre-layout\n--\n"; dump(); }); - // Assign section and fragment ordinals, all subsequent backend code is - // responsible for updating these in place. + // Create the layout object. + MCAsmLayout Layout(*this); + + // Insert additional align fragments for concrete sections to explicitly pad + // the previous section to match their alignment requirements. This is for + // 'gas' compatibility, it shouldn't strictly be necessary. + // + // FIXME: This may be Mach-O specific. + for (unsigned i = 1, e = Layout.getSectionOrder().size(); i < e; ++i) { + MCSectionData *SD = Layout.getSectionOrder()[i]; + + // Ignore sections without alignment requirements. + unsigned Align = SD->getAlignment(); + if (Align <= 1) + continue; + + // Ignore virtual sections, they don't cause file size modifications. + if (getBackend().isVirtualSection(SD->getSection())) + continue; + + // Otherwise, create a new align fragment at the end of the previous + // section. + MCAlignFragment *AF = new MCAlignFragment(Align, 0, 1, Align, + Layout.getSectionOrder()[i - 1]); + AF->setOnlyAlignAddress(true); + } + + // Create dummy fragments and assign section ordinals. unsigned SectionIndex = 0; - unsigned FragmentIndex = 0; for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) { + // Create dummy fragments to eliminate any empty sections, this simplifies + // layout. + if (it->getFragmentList().empty()) { + unsigned ValueSize = 1; + if (getBackend().isVirtualSection(it->getSection())) + ValueSize = 1; + new MCFillFragment(0, 1, 0, it); + } + it->setOrdinal(SectionIndex++); + } - for (MCSectionData::iterator it2 = it->begin(), - ie2 = it->end(); it2 != ie2; ++it2) - it2->setOrdinal(FragmentIndex++); + // Assign layout order indices to sections and fragments. + unsigned FragmentIndex = 0; + for (unsigned i = 0, e = Layout.getSectionOrder().size(); i != e; ++i) { + MCSectionData *SD = Layout.getSectionOrder()[i]; + SD->setLayoutOrder(i); + + for (MCSectionData::iterator it2 = SD->begin(), + ie2 = SD->end(); it2 != ie2; ++it2) + it2->setLayoutOrder(FragmentIndex++); } // Layout until everything fits. - MCAsmLayout Layout(*this); while (LayoutOnce(Layout)) continue; @@ -678,7 +740,7 @@ void MCAssembler::Finish() { for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(), ie3 = DF->fixup_end(); it3 != ie3; ++it3) { - MCAsmFixup &Fixup = *it3; + MCFixup &Fixup = *it3; // Evaluate the fixup. MCValue Target; @@ -702,7 +764,7 @@ void MCAssembler::Finish() { stats::ObjectBytes += OS.tell() - StartOffset; } -bool MCAssembler::FixupNeedsRelaxation(const MCAsmFixup &Fixup, +bool MCAssembler::FixupNeedsRelaxation(const MCFixup &Fixup, const MCFragment *DF, const MCAsmLayout &Layout) const { if (getRelaxAll()) @@ -725,7 +787,7 @@ bool MCAssembler::FragmentNeedsRelaxation(const MCInstFragment *IF, // If this inst doesn't ever need relaxation, ignore it. This occurs when we // are intentionally pushing out inst fragments, or because we relaxed a // previous instruction to one that doesn't need relaxation. - if (!getBackend().MayNeedRelaxation(IF->getInst(), IF->getFixups())) + if (!getBackend().MayNeedRelaxation(IF->getInst())) return false; for (MCInstFragment::const_fixup_iterator it = IF->fixup_begin(), @@ -739,25 +801,8 @@ bool MCAssembler::FragmentNeedsRelaxation(const MCInstFragment *IF, bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) { ++stats::RelaxationSteps; - // Layout the concrete sections and fragments. - uint64_t Address = 0; - for (iterator it = begin(), ie = end(); it != ie; ++it) { - // Skip virtual sections. - if (getBackend().isVirtualSection(it->getSection())) - continue; - - // Layout the section fragments and its size. - Address = LayoutSection(*it, Layout, Address); - } - - // Layout the virtual sections. - for (iterator it = begin(), ie = end(); it != ie; ++it) { - if (!getBackend().isVirtualSection(it->getSection())) - continue; - - // Layout the section fragments and its size. - Address = LayoutSection(*it, Layout, Address); - } + // Layout the sections in order. + Layout.LayoutFile(); // Scan for fragments that need relaxation. bool WasRelaxed = false; @@ -779,7 +824,7 @@ bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) { // Relax the fragment. MCInst Relaxed; - getBackend().RelaxInstruction(IF, Relaxed); + getBackend().RelaxInstruction(IF->getInst(), Relaxed); // Encode the new instruction. // @@ -796,17 +841,12 @@ bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) { IF->setInst(Relaxed); IF->getCode() = Code; IF->getFixups().clear(); - for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { - MCFixup &F = Fixups[i]; - IF->getFixups().push_back(MCAsmFixup(F.getOffset(), *F.getValue(), - F.getKind())); - } + // FIXME: Eliminate copy. + for (unsigned i = 0, e = Fixups.size(); i != e; ++i) + IF->getFixups().push_back(Fixups[i]); - // Update the layout, and remember that we relaxed. If we are relaxing - // everything, we can skip this step since nothing will depend on updating - // the values. - if (!getRelaxAll()) - Layout.UpdateForSlide(IF, SlideAmount); + // Update the layout, and remember that we relaxed. + Layout.UpdateForSlide(IF, SlideAmount); WasRelaxed = true; } } @@ -838,12 +878,10 @@ void MCAssembler::FinishLayout(MCAsmLayout &Layout) { SD.getFragmentList().insert(it2, DF); // Update the data fragments layout data. - // - // FIXME: Add MCAsmLayout utility for this. DF->setParent(IF->getParent()); - DF->setOrdinal(IF->getOrdinal()); - Layout.setFragmentOffset(DF, Layout.getFragmentOffset(IF)); - Layout.setFragmentEffectiveSize(DF, Layout.getFragmentEffectiveSize(IF)); + DF->setAtom(IF->getAtom()); + DF->setLayoutOrder(IF->getLayoutOrder()); + Layout.FragmentReplaced(IF, DF); // Copy in the data and the fixups. DF->getContents().append(IF->getCode().begin(), IF->getCode().end()); @@ -861,9 +899,10 @@ void MCAssembler::FinishLayout(MCAsmLayout &Layout) { namespace llvm { -raw_ostream &operator<<(raw_ostream &OS, const MCAsmFixup &AF) { - OS << "<MCAsmFixup" << " Offset:" << AF.Offset << " Value:" << *AF.Value - << " Kind:" << AF.Kind << ">"; +raw_ostream &operator<<(raw_ostream &OS, const MCFixup &AF) { + OS << "<MCFixup" << " Offset:" << AF.getOffset() + << " Value:" << *AF.getValue() + << " Kind:" << AF.getKind() << ">"; return OS; } @@ -872,94 +911,82 @@ raw_ostream &operator<<(raw_ostream &OS, const MCAsmFixup &AF) { void MCFragment::dump() { raw_ostream &OS = llvm::errs(); - OS << "<MCFragment " << (void*) this << " Offset:" << Offset - << " EffectiveSize:" << EffectiveSize; - - OS << ">"; -} - -void MCAlignFragment::dump() { - raw_ostream &OS = llvm::errs(); + OS << "<"; + switch (getKind()) { + case MCFragment::FT_Align: OS << "MCAlignFragment"; break; + case MCFragment::FT_Data: OS << "MCDataFragment"; break; + case MCFragment::FT_Fill: OS << "MCFillFragment"; break; + case MCFragment::FT_Inst: OS << "MCInstFragment"; break; + case MCFragment::FT_Org: OS << "MCOrgFragment"; break; + } - OS << "<MCAlignFragment "; - this->MCFragment::dump(); - OS << "\n "; - OS << " Alignment:" << getAlignment() - << " Value:" << getValue() << " ValueSize:" << getValueSize() - << " MaxBytesToEmit:" << getMaxBytesToEmit() << ">"; -} + OS << "<MCFragment " << (void*) this << " LayoutOrder:" << LayoutOrder + << " Offset:" << Offset << " EffectiveSize:" << EffectiveSize << ">"; -void MCDataFragment::dump() { - raw_ostream &OS = llvm::errs(); - - OS << "<MCDataFragment "; - this->MCFragment::dump(); - OS << "\n "; - OS << " Contents:["; - for (unsigned i = 0, e = getContents().size(); i != e; ++i) { - if (i) OS << ","; - OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF); - } - OS << "] (" << getContents().size() << " bytes)"; - - if (!getFixups().empty()) { - OS << ",\n "; - OS << " Fixups:["; - for (fixup_iterator it = fixup_begin(), ie = fixup_end(); it != ie; ++it) { - if (it != fixup_begin()) OS << ",\n "; - OS << *it; + switch (getKind()) { + case MCFragment::FT_Align: { + const MCAlignFragment *AF = cast<MCAlignFragment>(this); + if (AF->hasEmitNops()) + OS << " (emit nops)"; + if (AF->hasOnlyAlignAddress()) + OS << " (only align section)"; + OS << "\n "; + OS << " Alignment:" << AF->getAlignment() + << " Value:" << AF->getValue() << " ValueSize:" << AF->getValueSize() + << " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">"; + break; + } + case MCFragment::FT_Data: { + const MCDataFragment *DF = cast<MCDataFragment>(this); + OS << "\n "; + OS << " Contents:["; + const SmallVectorImpl<char> &Contents = DF->getContents(); + for (unsigned i = 0, e = Contents.size(); i != e; ++i) { + if (i) OS << ","; + OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF); } - OS << "]"; + OS << "] (" << Contents.size() << " bytes)"; + + if (!DF->getFixups().empty()) { + OS << ",\n "; + OS << " Fixups:["; + for (MCDataFragment::const_fixup_iterator it = DF->fixup_begin(), + ie = DF->fixup_end(); it != ie; ++it) { + if (it != DF->fixup_begin()) OS << ",\n "; + OS << *it; + } + OS << "]"; + } + break; + } + case MCFragment::FT_Fill: { + const MCFillFragment *FF = cast<MCFillFragment>(this); + OS << " Value:" << FF->getValue() << " ValueSize:" << FF->getValueSize() + << " Size:" << FF->getSize(); + break; + } + case MCFragment::FT_Inst: { + const MCInstFragment *IF = cast<MCInstFragment>(this); + OS << "\n "; + OS << " Inst:"; + IF->getInst().dump_pretty(OS); + break; + } + case MCFragment::FT_Org: { + const MCOrgFragment *OF = cast<MCOrgFragment>(this); + OS << "\n "; + OS << " Offset:" << OF->getOffset() << " Value:" << OF->getValue(); + break; + } } - - OS << ">"; -} - -void MCFillFragment::dump() { - raw_ostream &OS = llvm::errs(); - - OS << "<MCFillFragment "; - this->MCFragment::dump(); - OS << "\n "; - OS << " Value:" << getValue() << " ValueSize:" << getValueSize() - << " Count:" << getCount() << ">"; -} - -void MCInstFragment::dump() { - raw_ostream &OS = llvm::errs(); - - OS << "<MCInstFragment "; - this->MCFragment::dump(); - OS << "\n "; - OS << " Inst:"; - getInst().dump_pretty(OS); OS << ">"; } -void MCOrgFragment::dump() { - raw_ostream &OS = llvm::errs(); - - OS << "<MCOrgFragment "; - this->MCFragment::dump(); - OS << "\n "; - OS << " Offset:" << getOffset() << " Value:" << getValue() << ">"; -} - -void MCZeroFillFragment::dump() { - raw_ostream &OS = llvm::errs(); - - OS << "<MCZeroFillFragment "; - this->MCFragment::dump(); - OS << "\n "; - OS << " Size:" << getSize() << " Alignment:" << getAlignment() << ">"; -} - void MCSectionData::dump() { raw_ostream &OS = llvm::errs(); OS << "<MCSectionData"; OS << " Alignment:" << getAlignment() << " Address:" << Address - << " Size:" << Size << " FileSize:" << FileSize << " Fragments:[\n "; for (iterator it = begin(), ie = end(); it != ie; ++it) { if (it != begin()) OS << ",\n "; diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index dc757bb..53ffc94 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -11,18 +11,22 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCLabel.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" using namespace llvm; typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy; typedef StringMap<const MCSectionELF*> ELFUniqueMapTy; +typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy; MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0) { MachOUniquingMap = 0; ELFUniquingMap = 0; + COFFUniquingMap = 0; } MCContext::~MCContext() { @@ -32,6 +36,7 @@ MCContext::~MCContext() { // If we have the MachO uniquing map, free it. delete (MachOUniqueMapTy*)MachOUniquingMap; delete (ELFUniqueMapTy*)ELFUniquingMap; + delete (COFFUniqueMapTy*)COFFUniquingMap; } //===----------------------------------------------------------------------===// @@ -67,6 +72,34 @@ MCSymbol *MCContext::CreateTempSymbol() { "tmp" + Twine(NextUniqueID++)); } +unsigned MCContext::NextInstance(int64_t LocalLabelVal) { + MCLabel *&Label = Instances[LocalLabelVal]; + if (!Label) + Label = new (*this) MCLabel(0); + return Label->incInstance(); +} + +unsigned MCContext::GetInstance(int64_t LocalLabelVal) { + MCLabel *&Label = Instances[LocalLabelVal]; + if (!Label) + Label = new (*this) MCLabel(0); + return Label->getInstance(); +} + +MCSymbol *MCContext::CreateDirectionalLocalSymbol(int64_t LocalLabelVal) { + return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) + + Twine(LocalLabelVal) + + "\2" + + Twine(NextInstance(LocalLabelVal))); +} +MCSymbol *MCContext::GetDirectionalLocalSymbol(int64_t LocalLabelVal, + int bORf) { + return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) + + Twine(LocalLabelVal) + + "\2" + + Twine(GetInstance(LocalLabelVal) + bORf)); +} + MCSymbol *MCContext::LookupSymbol(StringRef Name) const { return Symbols.lookup(Name); } @@ -122,4 +155,22 @@ getELFSection(StringRef Section, unsigned Type, unsigned Flags, return Result; } - +const MCSection *MCContext::getCOFFSection(StringRef Section, + unsigned Characteristics, + int Selection, + SectionKind Kind) { + if (COFFUniquingMap == 0) + COFFUniquingMap = new COFFUniqueMapTy(); + COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)COFFUniquingMap; + + // Do the lookup, if we have a hit, return it. + StringMapEntry<const MCSectionCOFF*> &Entry = Map.GetOrCreateValue(Section); + if (Entry.getValue()) return Entry.getValue(); + + MCSectionCOFF *Result = new (*this) MCSectionCOFF(Entry.getKey(), + Characteristics, + Selection, Kind); + + Entry.setValue(Result); + return Result; +} diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index bc670ab..c000dd7 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -39,6 +39,10 @@ void MCExpr::print(raw_ostream &OS) const { const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(*this); const MCSymbol &Sym = SRE.getSymbol(); + if (SRE.getKind() == MCSymbolRefExpr::VK_ARM_HI16 || + SRE.getKind() == MCSymbolRefExpr::VK_ARM_LO16) + OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); + // Parenthesize names that start with $ so that they don't look like // absolute names. if (Sym.getName()[0] == '$') @@ -46,7 +50,9 @@ void MCExpr::print(raw_ostream &OS) const { else OS << Sym; - if (SRE.getKind() != MCSymbolRefExpr::VK_None) + if (SRE.getKind() != MCSymbolRefExpr::VK_None && + SRE.getKind() != MCSymbolRefExpr::VK_ARM_HI16 && + SRE.getKind() != MCSymbolRefExpr::VK_ARM_LO16) OS << '@' << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); return; @@ -169,6 +175,9 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { case VK_PLT: return "PLT"; case VK_TLSGD: return "TLSGD"; case VK_TPOFF: return "TPOFF"; + case VK_ARM_HI16: return ":upper16:"; + case VK_ARM_LO16: return ":lower16:"; + case VK_TLVP: return "TLVP"; } } @@ -184,6 +193,7 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) { .Case("PLT", VK_PLT) .Case("TLSGD", VK_TLSGD) .Case("TPOFF", VK_TPOFF) + .Case("TLVP", VK_TLVP) .Default(VK_Invalid); } @@ -249,7 +259,7 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res, // Evaluate recursively if this is a variable. if (Sym.isVariable()) { - if (!Sym.getValue()->EvaluateAsRelocatable(Res, Layout)) + if (!Sym.getVariableValue()->EvaluateAsRelocatable(Res, Layout)) return false; // Absolutize symbol differences between defined symbols when we have a diff --git a/lib/MC/MCInst.cpp b/lib/MC/MCInst.cpp index de142dc..4cb628b 100644 --- a/lib/MC/MCInst.cpp +++ b/lib/MC/MCInst.cpp @@ -57,7 +57,7 @@ void MCInst::dump_pretty(raw_ostream &OS, const MCAsmInfo *MAI, OS << Separator; getOperand(i).print(OS, MAI); } - OS << ">\n"; + OS << ">"; } void MCInst::dump() const { diff --git a/lib/MC/MCLabel.cpp b/lib/MC/MCLabel.cpp new file mode 100644 index 0000000..9c0fc92 --- /dev/null +++ b/lib/MC/MCLabel.cpp @@ -0,0 +1,21 @@ +//===- lib/MC/MCLabel.cpp - MCLabel implementation ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCLabel.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +void MCLabel::print(raw_ostream &OS) const { + OS << '"' << getInstance() << '"'; +} + +void MCLabel::dump() const { + print(dbgs()); +} diff --git a/lib/MC/MCLoggingStreamer.cpp b/lib/MC/MCLoggingStreamer.cpp new file mode 100644 index 0000000..b96040a --- /dev/null +++ b/lib/MC/MCLoggingStreamer.cpp @@ -0,0 +1,208 @@ +//===- lib/MC/MCLoggingStreamer.cpp - API Logging Streamer ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCStreamer.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + +class MCLoggingStreamer : public MCStreamer { + llvm::OwningPtr<MCStreamer> Child; + + raw_ostream &OS; + +public: + MCLoggingStreamer(MCStreamer *_Child, raw_ostream &_OS) + : MCStreamer(_Child->getContext()), Child(_Child), OS(_OS) {} + + void LogCall(const char *Function) { + OS << Function << "\n"; + } + + void LogCall(const char *Function, const Twine &Message) { + OS << Function << ": " << Message << "\n"; + } + + virtual bool isVerboseAsm() const { return Child->isVerboseAsm(); } + + virtual bool hasRawTextSupport() const { return Child->hasRawTextSupport(); } + + virtual raw_ostream &GetCommentOS() { return Child->GetCommentOS(); } + + virtual void AddComment(const Twine &T) { + LogCall("AddComment", T); + return Child->AddComment(T); + } + + virtual void AddBlankLine() { + LogCall("AddBlankLine"); + return Child->AddBlankLine(); + } + + virtual void SwitchSection(const MCSection *Section) { + CurSection = Section; + LogCall("SwitchSection"); + return Child->SwitchSection(Section); + } + + virtual void EmitLabel(MCSymbol *Symbol) { + LogCall("EmitLabel"); + return Child->EmitLabel(Symbol); + } + + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) { + LogCall("EmitAssemblerFlag"); + return Child->EmitAssemblerFlag(Flag); + } + + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + LogCall("EmitAssignment"); + return Child->EmitAssignment(Symbol, Value); + } + + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) { + LogCall("EmitSymbolAttribute"); + return Child->EmitSymbolAttribute(Symbol, Attribute); + } + + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { + LogCall("EmitSymbolDesc"); + return Child->EmitSymbolDesc(Symbol, DescValue); + } + + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) { + LogCall("BeginCOFFSymbolDef"); + return Child->BeginCOFFSymbolDef(Symbol); + } + + virtual void EmitCOFFSymbolStorageClass(int StorageClass) { + LogCall("EmitCOFFSymbolStorageClass"); + return Child->EmitCOFFSymbolStorageClass(StorageClass); + } + + virtual void EmitCOFFSymbolType(int Type) { + LogCall("EmitCOFFSymbolType"); + return Child->EmitCOFFSymbolType(Type); + } + + virtual void EndCOFFSymbolDef() { + LogCall("EndCOFFSymbolDef"); + return Child->EndCOFFSymbolDef(); + } + + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { + LogCall("EmitELFSize"); + return Child->EmitELFSize(Symbol, Value); + } + + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { + LogCall("EmitCommonSymbol"); + return Child->EmitCommonSymbol(Symbol, Size, ByteAlignment); + } + + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) { + LogCall("EmitLocalCommonSymbol"); + return Child->EmitLocalCommonSymbol(Symbol, Size); + } + + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + unsigned Size = 0, unsigned ByteAlignment = 0) { + LogCall("EmitZerofill"); + return Child->EmitZerofill(Section, Symbol, Size, ByteAlignment); + } + + virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment = 0) { + LogCall("EmitTBSSSymbol"); + return Child->EmitTBSSSymbol(Section, Symbol, Size, ByteAlignment); + } + + virtual void EmitBytes(StringRef Data, unsigned AddrSpace) { + LogCall("EmitBytes"); + return Child->EmitBytes(Data, AddrSpace); + } + + virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace){ + LogCall("EmitValue"); + return Child->EmitValue(Value, Size, AddrSpace); + } + + virtual void EmitIntValue(uint64_t Value, unsigned Size, unsigned AddrSpace) { + LogCall("EmitIntValue"); + return Child->EmitIntValue(Value, Size, AddrSpace); + } + + virtual void EmitGPRel32Value(const MCExpr *Value) { + LogCall("EmitGPRel32Value"); + return Child->EmitGPRel32Value(Value); + } + + virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue, + unsigned AddrSpace) { + LogCall("EmitFill"); + return Child->EmitFill(NumBytes, FillValue, AddrSpace); + } + + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, + unsigned ValueSize = 1, + unsigned MaxBytesToEmit = 0) { + LogCall("EmitValueToAlignment"); + return Child->EmitValueToAlignment(ByteAlignment, Value, + ValueSize, MaxBytesToEmit); + } + + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0) { + LogCall("EmitCodeAlignment"); + return Child->EmitCodeAlignment(ByteAlignment, MaxBytesToEmit); + } + + virtual void EmitValueToOffset(const MCExpr *Offset, + unsigned char Value = 0) { + LogCall("EmitValueToOffset"); + return Child->EmitValueToOffset(Offset, Value); + } + + virtual void EmitFileDirective(StringRef Filename) { + LogCall("EmitFileDirective", "FileName:" + Filename); + return Child->EmitFileDirective(Filename); + } + + virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) { + LogCall("EmitDwarfFileDirective", + "FileNo:" + Twine(FileNo) + " Filename:" + Filename); + return Child->EmitDwarfFileDirective(FileNo, Filename); + } + + virtual void EmitInstruction(const MCInst &Inst) { + LogCall("EmitInstruction"); + return Child->EmitInstruction(Inst); + } + + virtual void EmitRawText(StringRef String) { + LogCall("EmitRawText", "\"" + String + "\""); + return Child->EmitRawText(String); + } + + virtual void Finish() { + LogCall("Finish"); + return Child->Finish(); + } + +}; + +} // end anonymous namespace. + +MCStreamer *llvm::createLoggingStreamer(MCStreamer *Child, raw_ostream &OS) { + return new MCLoggingStreamer(Child, OS); +} diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 120f837..27e4e98 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -16,6 +16,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCMachOSymbolFlags.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetAsmBackend.h" @@ -25,30 +26,14 @@ using namespace llvm; namespace { class MCMachOStreamer : public MCStreamer { - /// SymbolFlags - We store the value for the 'desc' symbol field in the lowest - /// 16 bits of the implementation defined flags. - enum SymbolFlags { // See <mach-o/nlist.h>. - SF_DescFlagsMask = 0xFFFF, - - // Reference type flags. - SF_ReferenceTypeMask = 0x0007, - SF_ReferenceTypeUndefinedNonLazy = 0x0000, - SF_ReferenceTypeUndefinedLazy = 0x0001, - SF_ReferenceTypeDefined = 0x0002, - SF_ReferenceTypePrivateDefined = 0x0003, - SF_ReferenceTypePrivateUndefinedNonLazy = 0x0004, - SF_ReferenceTypePrivateUndefinedLazy = 0x0005, - - // Other 'desc' flags. - SF_NoDeadStrip = 0x0020, - SF_WeakReference = 0x0040, - SF_WeakDefinition = 0x0080 - }; private: MCAssembler Assembler; MCSectionData *CurSectionData; + /// Track the current atom for each section. + DenseMap<const MCSectionData*, MCSymbolData*> CurrentAtomMap; + private: MCFragment *getCurrentFragment() const { assert(CurSectionData && "No current section!"); @@ -64,10 +49,20 @@ private: MCDataFragment *getOrCreateDataFragment() const { MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment()); if (!F) - F = new MCDataFragment(CurSectionData); + F = createDataFragment(); return F; } + /// Create a new data fragment in the current section. + MCDataFragment *createDataFragment() const { + MCDataFragment *DF = new MCDataFragment(CurSectionData); + DF->setAtom(CurrentAtomMap.lookup(CurSectionData)); + return DF; + } + + void EmitInstToFragment(const MCInst &Inst); + void EmitInstToData(const MCInst &Inst); + public: MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB, raw_ostream &_OS, MCCodeEmitter *_Emitter) @@ -114,6 +109,18 @@ public: virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment); + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) { + assert(0 && "macho doesn't support this directive"); + } + virtual void EmitCOFFSymbolStorageClass(int StorageClass) { + assert(0 && "macho doesn't support this directive"); + } + virtual void EmitCOFFSymbolType(int Type) { + assert(0 && "macho doesn't support this directive"); + } + virtual void EndCOFFSymbolDef() { + assert(0 && "macho doesn't support this directive"); + } virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { assert(0 && "macho doesn't support this directive"); } @@ -122,6 +129,8 @@ public: } virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, unsigned Size = 0, unsigned ByteAlignment = 0); + virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment = 0); virtual void EmitBytes(StringRef Data, unsigned AddrSpace); virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace); virtual void EmitGPRel32Value(const MCExpr *Value) { @@ -134,14 +143,14 @@ public: unsigned MaxBytesToEmit = 0); virtual void EmitValueToOffset(const MCExpr *Offset, unsigned char Value = 0); - + virtual void EmitFileDirective(StringRef Filename) { - errs() << "FIXME: MCMachoStreamer:EmitFileDirective not implemented\n"; + report_fatal_error("unsupported directive: '.file'"); } virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) { - errs() << "FIXME: MCMachoStreamer:EmitDwarfFileDirective not implemented\n"; + report_fatal_error("unsupported directive: '.file'"); } - + virtual void EmitInstruction(const MCInst &Inst); virtual void Finish(); @@ -152,7 +161,7 @@ public: void MCMachOStreamer::SwitchSection(const MCSection *Section) { assert(Section && "Cannot switch to a null section!"); - + // If already in this section, then this is a noop. if (Section == CurSection) return; @@ -162,20 +171,39 @@ void MCMachOStreamer::SwitchSection(const MCSection *Section) { void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); + assert(CurSection && "Cannot emit before setting section!"); + + MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol); + + // Update the current atom map, if necessary. + bool MustCreateFragment = false; + if (Assembler.isSymbolLinkerVisible(&SD)) { + CurrentAtomMap[CurSectionData] = &SD; + + // We have to create a new fragment, fragments cannot span atoms. + MustCreateFragment = true; + } // FIXME: This is wasteful, we don't necessarily need to create a data // fragment. Instead, we should mark the symbol as pointing into the data // fragment if it exists, otherwise we should just queue the label and set its // fragment pointer when we emit the next fragment. - MCDataFragment *F = getOrCreateDataFragment(); - MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol); + MCDataFragment *F = + MustCreateFragment ? createDataFragment() : getOrCreateDataFragment(); assert(!SD.getFragment() && "Unexpected fragment on symbol data!"); SD.setFragment(F); SD.setOffset(F->getContents().size()); - // This causes the reference type and weak reference flags to be cleared. - SD.setFlags(SD.getFlags() & ~(SF_WeakReference | SF_ReferenceTypeMask)); - + // This causes the reference type flag to be cleared. Darwin 'as' was "trying" + // to clear the weak reference and weak definition bits too, but the + // implementation was buggy. For now we just try to match 'as', for + // diffability. + // + // FIXME: Cleanup this code, these bits should be emitted based on semantic + // properties, not on the order of definition, etc. + SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeMask); + Symbol->setSection(*CurSection); } @@ -190,13 +218,9 @@ void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { } void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { - // Only absolute symbols can be redefined. - assert((Symbol->isUndefined() || Symbol->isAbsolute()) && - "Cannot define a symbol twice!"); - // FIXME: Lift context changes into super class. - // FIXME: Set associated section. - Symbol->setValue(AddValueSymbols(Value)); + Assembler.getOrCreateSymbolData(*Symbol); + Symbol->setVariableValue(AddValueSymbols(Value)); } void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, @@ -243,6 +267,13 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, case MCSA_Global: SD.setExternal(true); + // This effectively clears the undefined lazy bit, in Darwin 'as', although + // it isn't very consistent because it implements this as part of symbol + // lookup. + // + // FIXME: Cleanup this code, these bits should be emitted based on semantic + // properties, not on the order of definition, etc. + SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeUndefinedLazy); break; case MCSA_LazyReference: @@ -280,7 +311,7 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, void MCMachOStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { // Encode the 'desc' value into the lowest implementation defined bits. - assert(DescValue == (DescValue & SF_DescFlagsMask) && + assert(DescValue == (DescValue & SF_DescFlagsMask) && "Invalid .desc value!"); Assembler.getOrCreateSymbolData(*Symbol).setFlags(DescValue&SF_DescFlagsMask); } @@ -309,8 +340,14 @@ void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol); - MCFragment *F = new MCZeroFillFragment(Size, ByteAlignment, &SectData); + // Emit an align fragment if necessary. + if (ByteAlignment != 1) + new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, &SectData); + + MCFragment *F = new MCFillFragment(0, 0, Size, &SectData); SD.setFragment(F); + if (Assembler.isSymbolLinkerVisible(&SD)) + F->setAtom(&SD); Symbol->setSection(*Section); @@ -319,6 +356,14 @@ void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, SectData.setAlignment(ByteAlignment); } +// This should always be called with the thread local bss section. Like the +// .zerofill directive this doesn't actually switch sections on us. +void MCMachOStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment) { + EmitZerofill(Section, Symbol, Size, ByteAlignment); + return; +} + void MCMachOStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end()); } @@ -334,8 +379,9 @@ void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size, for (unsigned i = 0; i != Size; ++i) DF->getContents().push_back(uint8_t(AbsValue >> (i * 8))); } else { - DF->addFixup(MCAsmFixup(DF->getContents().size(), *AddValueSymbols(Value), - MCFixup::getKindForSize(Size))); + DF->addFixup(MCFixup::Create(DF->getContents().size(), + AddValueSymbols(Value), + MCFixup::getKindForSize(Size))); DF->getContents().resize(DF->getContents().size() + Size, 0); } } @@ -345,8 +391,9 @@ void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment, unsigned MaxBytesToEmit) { if (MaxBytesToEmit == 0) MaxBytesToEmit = ByteAlignment; - new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit, - false /* EmitNops */, CurSectionData); + MCFragment *F = new MCAlignFragment(ByteAlignment, Value, ValueSize, + MaxBytesToEmit, CurSectionData); + F->setAtom(CurrentAtomMap.lookup(CurSectionData)); // Update the maximum alignment on the current section if necessary. if (ByteAlignment > CurSectionData->getAlignment()) @@ -357,8 +404,10 @@ void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment, unsigned MaxBytesToEmit) { if (MaxBytesToEmit == 0) MaxBytesToEmit = ByteAlignment; - new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit, - true /* EmitNops */, CurSectionData); + MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit, + CurSectionData); + F->setEmitNops(true); + F->setAtom(CurrentAtomMap.lookup(CurSectionData)); // Update the maximum alignment on the current section if necessary. if (ByteAlignment > CurSectionData->getAlignment()) @@ -367,19 +416,30 @@ void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment, void MCMachOStreamer::EmitValueToOffset(const MCExpr *Offset, unsigned char Value) { - new MCOrgFragment(*Offset, Value, CurSectionData); + MCFragment *F = new MCOrgFragment(*Offset, Value, CurSectionData); + F->setAtom(CurrentAtomMap.lookup(CurSectionData)); } -void MCMachOStreamer::EmitInstruction(const MCInst &Inst) { - // Scan for values. - for (unsigned i = 0; i != Inst.getNumOperands(); ++i) - if (Inst.getOperand(i).isExpr()) - AddValueSymbols(Inst.getOperand(i).getExpr()); +void MCMachOStreamer::EmitInstToFragment(const MCInst &Inst) { + MCInstFragment *IF = new MCInstFragment(Inst, CurSectionData); + IF->setAtom(CurrentAtomMap.lookup(CurSectionData)); - CurSectionData->setHasInstructions(true); + // Add the fixups and data. + // + // FIXME: Revisit this design decision when relaxation is done, we may be + // able to get away with not storing any extra data in the MCInst. + SmallVector<MCFixup, 4> Fixups; + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + Assembler.getEmitter().EncodeInstruction(Inst, VecOS, Fixups); + VecOS.flush(); - // FIXME-PERF: Common case is that we don't need to relax, encode directly - // onto the data fragments buffers. + IF->getCode() = Code; + IF->getFixups() = Fixups; +} + +void MCMachOStreamer::EmitInstToData(const MCInst &Inst) { + MCDataFragment *DF = getOrCreateDataFragment(); SmallVector<MCFixup, 4> Fixups; SmallString<256> Code; @@ -387,47 +447,41 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) { Assembler.getEmitter().EncodeInstruction(Inst, VecOS, Fixups); VecOS.flush(); - // FIXME: Eliminate this copy. - SmallVector<MCAsmFixup, 4> AsmFixups; + // Add the fixups and data. for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { - MCFixup &F = Fixups[i]; - AsmFixups.push_back(MCAsmFixup(F.getOffset(), *F.getValue(), - F.getKind())); + Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size()); + DF->addFixup(Fixups[i]); } + DF->getContents().append(Code.begin(), Code.end()); +} - // See if we might need to relax this instruction, if so it needs its own - // fragment. - // - // FIXME-PERF: Support target hook to do a fast path that avoids the encoder, - // when we can immediately tell that we will get something which might need - // relaxation (and compute its size). - // - // FIXME-PERF: We should also be smart about immediately relaxing instructions - // which we can already show will never possibly fit (we can also do a very - // good job of this before we do the first relaxation pass, because we have - // total knowledge about undefined symbols at that point). Even now, though, - // we can do a decent job, especially on Darwin where scattering means that we - // are going to often know that we can never fully resolve a fixup. - if (Assembler.getBackend().MayNeedRelaxation(Inst, AsmFixups)) { - MCInstFragment *IF = new MCInstFragment(Inst, CurSectionData); - - // Add the fixups and data. - // - // FIXME: Revisit this design decision when relaxation is done, we may be - // able to get away with not storing any extra data in the MCInst. - IF->getCode() = Code; - IF->getFixups() = AsmFixups; +void MCMachOStreamer::EmitInstruction(const MCInst &Inst) { + // Scan for values. + for (unsigned i = Inst.getNumOperands(); i--; ) + if (Inst.getOperand(i).isExpr()) + AddValueSymbols(Inst.getOperand(i).getExpr()); + CurSectionData->setHasInstructions(true); + + // If this instruction doesn't need relaxation, just emit it as data. + if (!Assembler.getBackend().MayNeedRelaxation(Inst)) { + EmitInstToData(Inst); return; } - // Add the fixups and data. - MCDataFragment *DF = getOrCreateDataFragment(); - for (unsigned i = 0, e = AsmFixups.size(); i != e; ++i) { - AsmFixups[i].Offset += DF->getContents().size(); - DF->addFixup(AsmFixups[i]); + // Otherwise, if we are relaxing everything, relax the instruction as much as + // possible and emit it as data. + if (Assembler.getRelaxAll()) { + MCInst Relaxed; + Assembler.getBackend().RelaxInstruction(Inst, Relaxed); + while (Assembler.getBackend().MayNeedRelaxation(Relaxed)) + Assembler.getBackend().RelaxInstruction(Relaxed, Relaxed); + EmitInstToData(Relaxed); + return; } - DF->getContents().append(Code.begin(), Code.end()); + + // Otherwise emit to a separate fragment. + EmitInstToFragment(Inst); } void MCMachOStreamer::Finish() { diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp index 5f0c64a..5332ade 100644 --- a/lib/MC/MCNullStreamer.cpp +++ b/lib/MC/MCNullStreamer.cpp @@ -42,6 +42,12 @@ namespace { virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute){} virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {} + + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {} + virtual void EmitCOFFSymbolStorageClass(int StorageClass) {} + virtual void EmitCOFFSymbolType(int Type) {} + virtual void EndCOFFSymbolDef() {} + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {} virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) {} @@ -49,7 +55,8 @@ namespace { virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, unsigned Size = 0, unsigned ByteAlignment = 0) {} - + virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment) {} virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {} virtual void EmitValue(const MCExpr *Value, unsigned Size, diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index 1183312..1cbe09a 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -132,11 +132,6 @@ AsmToken AsmLexer::LexLineComment() { /// Decimal integer: [1-9][0-9]* /// TODO: FP literal. AsmToken AsmLexer::LexDigit() { - if (*CurPtr == ':') - return ReturnError(TokStart, "FIXME: local label not implemented"); - if (*CurPtr == 'f' || *CurPtr == 'b') - return ReturnError(TokStart, "FIXME: directional label not implemented"); - // Decimal integer: [1-9][0-9]* if (CurPtr[-1] != '0') { while (isdigit(*CurPtr)) @@ -158,6 +153,12 @@ AsmToken AsmLexer::LexDigit() { if (*CurPtr == 'b') { ++CurPtr; + // See if we actually have "0b" as part of something like "jmp 0b\n" + if (!isdigit(CurPtr[0])) { + --CurPtr; + StringRef Result(TokStart, CurPtr - TokStart); + return AsmToken(AsmToken::Integer, Result, 0); + } const char *NumStart = CurPtr; while (CurPtr[0] == '0' || CurPtr[0] == '1') ++CurPtr; @@ -280,6 +281,7 @@ AsmToken AsmLexer::LexToken() { case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1)); case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); + case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1)); case '=': if (*CurPtr == '=') return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2)); diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index a63d2e4..4523eab 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -13,6 +13,7 @@ #include "llvm/MC/MCParser/AsmParser.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -189,6 +190,9 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { std::pair<StringRef, StringRef> Split = getTok().getIdentifier().split('@'); MCSymbol *Sym = CreateSymbol(Split.first); + // Mark the symbol as used in an expression. + Sym->setUsedInExpr(true); + // Lookup the symbol variant if used. MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; if (Split.first.size() != getTok().getIdentifier().size()) @@ -199,11 +203,11 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { // If this is an absolute variable reference, substitute it now to preserve // semantics in the face of reassignment. - if (Sym->getValue() && isa<MCConstantExpr>(Sym->getValue())) { + if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) { if (Variant) return Error(EndLoc, "unexpected modified on variable reference"); - Res = Sym->getValue(); + Res = Sym->getVariableValue(); return false; } @@ -211,11 +215,28 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { Res = MCSymbolRefExpr::Create(Sym, Variant, getContext()); return false; } - case AsmToken::Integer: - Res = MCConstantExpr::Create(getTok().getIntVal(), getContext()); + case AsmToken::Integer: { + SMLoc Loc = getTok().getLoc(); + int64_t IntVal = getTok().getIntVal(); + Res = MCConstantExpr::Create(IntVal, getContext()); EndLoc = Lexer.getLoc(); Lex(); // Eat token. + // Look for 'b' or 'f' following an Integer as a directional label + if (Lexer.getKind() == AsmToken::Identifier) { + StringRef IDVal = getTok().getString(); + if (IDVal == "f" || IDVal == "b"){ + MCSymbol *Sym = Ctx.GetDirectionalLocalSymbol(IntVal, + IDVal == "f" ? 1 : 0); + Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, + getContext()); + if(IDVal == "b" && Sym->isUndefined()) + return Error(Loc, "invalid reference to undefined symbol"); + EndLoc = Lexer.getLoc(); + Lex(); // Eat identifier. + } + } return false; + } case AsmToken::Dot: { // This is a '.' reference, which references the current PC. Emit a // temporary label to the streamer and refer to it. @@ -411,6 +432,7 @@ bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, /// ::= Label* Identifier OperandList* EndOfStatement bool AsmParser::ParseStatement() { if (Lexer.is(AsmToken::EndOfStatement)) { + Out.AddBlankLine(); Lex(); return false; } @@ -419,7 +441,25 @@ bool AsmParser::ParseStatement() { AsmToken ID = getTok(); SMLoc IDLoc = ID.getLoc(); StringRef IDVal; - if (ParseIdentifier(IDVal)) { + int64_t LocalLabelVal = -1; + // GUESS allow an integer followed by a ':' as a directional local label + if (Lexer.is(AsmToken::Integer)) { + LocalLabelVal = getTok().getIntVal(); + if (LocalLabelVal < 0) { + if (!TheCondState.Ignore) + return TokError("unexpected token at start of statement"); + IDVal = ""; + } + else { + IDVal = getTok().getString(); + Lex(); // Consume the integer token to be used as an identifier token. + if (Lexer.getKind() != AsmToken::Colon) { + if (!TheCondState.Ignore) + return TokError("unexpected token at start of statement"); + } + } + } + else if (ParseIdentifier(IDVal)) { if (!TheCondState.Ignore) return TokError("unexpected token at start of statement"); IDVal = ""; @@ -456,13 +496,25 @@ bool AsmParser::ParseStatement() { // FIXME: Diagnostics. Note the location of the definition as a label. // FIXME: This doesn't diagnose assignment to a symbol which has been // implicitly marked as external. - MCSymbol *Sym = CreateSymbol(IDVal); - if (!Sym->isUndefined()) + MCSymbol *Sym; + if (LocalLabelVal == -1) + Sym = CreateSymbol(IDVal); + else + Sym = Ctx.CreateDirectionalLocalSymbol(LocalLabelVal); + if (!Sym->isUndefined() || Sym->isVariable()) return Error(IDLoc, "invalid symbol redefinition"); // Emit the label. Out.EmitLabel(Sym); + // Consume any end of statement token, if present, to avoid spurious + // AddBlankLine calls(). + if (Lexer.is(AsmToken::EndOfStatement)) { + Lex(); + if (Lexer.is(AsmToken::Eof)) + return false; + } + return ParseStatement(); } @@ -620,6 +672,16 @@ bool AsmParser::ParseStatement() { return ParseDirectiveSectionSwitch("__OBJC", "__selector_strs", MCSectionMachO::S_CSTRING_LITERALS); + if (IDVal == ".tdata") + return ParseDirectiveSectionSwitch("__DATA", "__thread_data", + MCSectionMachO::S_THREAD_LOCAL_REGULAR); + if (IDVal == ".tlv") + return ParseDirectiveSectionSwitch("__DATA", "__thread_vars", + MCSectionMachO::S_THREAD_LOCAL_VARIABLES); + if (IDVal == ".thread_init_func") + return ParseDirectiveSectionSwitch("__DATA", "__thread_init", + MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS); + // Assembler features if (IDVal == ".set") return ParseDirectiveSet(); @@ -686,6 +748,8 @@ bool AsmParser::ParseStatement() { return ParseDirectiveSymbolAttribute(MCSA_Protected); if (IDVal == ".reference") return ParseDirectiveSymbolAttribute(MCSA_Reference); + if (IDVal == ".type") + return ParseDirectiveELFType(); if (IDVal == ".weak") return ParseDirectiveSymbolAttribute(MCSA_Weak); if (IDVal == ".weak_definition") @@ -703,6 +767,8 @@ bool AsmParser::ParseStatement() { return ParseDirectiveDarwinSymbolDesc(); if (IDVal == ".lsym") return ParseDirectiveDarwinLsym(); + if (IDVal == ".tbss") + return ParseDirectiveDarwinTBSS(); if (IDVal == ".subsections_via_symbols") return ParseDirectiveDarwinSubsectionsViaSymbols(); @@ -729,8 +795,13 @@ bool AsmParser::ParseStatement() { return false; } + // Canonicalize the opcode to lower case. + SmallString<128> Opcode; + for (unsigned i = 0, e = IDVal.size(); i != e; ++i) + Opcode.push_back(tolower(IDVal[i])); + SmallVector<MCParsedAsmOperand*, 8> ParsedOperands; - bool HadError = getTargetParser().ParseInstruction(IDVal, IDLoc, + bool HadError = getTargetParser().ParseInstruction(Opcode.str(), IDLoc, ParsedOperands); if (!HadError && Lexer.isNot(AsmToken::EndOfStatement)) HadError = TokError("unexpected token in argument list"); @@ -786,11 +857,13 @@ bool AsmParser::ParseAssignment(const StringRef &Name) { // // FIXME: Diagnostics. Note the location of the definition as a label. // FIXME: Diagnose assignment to protected identifier (e.g., register name). - if (!Sym->isUndefined() && !Sym->isAbsolute()) + if (Sym->isUndefined() && !Sym->isUsedInExpr()) + ; // Allow redefinitions of undefined symbols only used in directives. + else if (!Sym->isUndefined() && !Sym->isAbsolute()) return Error(EqualLoc, "redefinition of '" + Name + "'"); else if (!Sym->isVariable()) return Error(EqualLoc, "invalid assignment to '" + Name + "'"); - else if (!isa<MCConstantExpr>(Sym->getValue())) + else if (!isa<MCConstantExpr>(Sym->getVariableValue())) return Error(EqualLoc, "invalid reassignment of non-absolute variable '" + Name + "'"); } else @@ -798,6 +871,8 @@ bool AsmParser::ParseAssignment(const StringRef &Name) { // FIXME: Handle '.'. + Sym->setUsedInExpr(true); + // Do the assignment. Out.EmitAssignment(Sym, Value); @@ -1008,7 +1083,11 @@ bool AsmParser::ParseDirectiveValue(unsigned Size) { if (ParseExpression(Value)) return true; - Out.EmitValue(Value, Size, DEFAULT_ADDRSPACE); + // Special case constant expressions to match code generator. + if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) + Out.EmitIntValue(MCE->getValue(), Size, DEFAULT_ADDRSPACE); + else + Out.EmitValue(Value, Size, DEFAULT_ADDRSPACE); if (Lexer.is(AsmToken::EndOfStatement)) break; @@ -1090,8 +1169,7 @@ bool AsmParser::ParseDirectiveFill() { return TokError("invalid '.fill' size, expected 1, 2, 4, or 8"); for (uint64_t i = 0, e = NumValues; i != e; ++i) - Out.EmitValue(MCConstantExpr::Create(FillExpr, getContext()), FillSize, - DEFAULT_ADDRSPACE); + Out.EmitIntValue(FillExpr, FillSize, DEFAULT_ADDRSPACE); return false; } @@ -1169,10 +1247,8 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { Lex(); - if (!HasFillExpr) { - // FIXME: Sometimes fill with nop. + if (!HasFillExpr) FillExpr = 0; - } // Compute alignment in bytes. if (IsPow2) { @@ -1200,14 +1276,21 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { } } - // FIXME: hard code the parser to use EmitCodeAlignment for text when using - // the TextAlignFillValue. - if(Out.getCurrentSection()->getKind().isText() && - Lexer.getMAI().getTextAlignFillValue() == FillExpr) + // Check whether we should use optimal code alignment for this .align + // directive. + // + // FIXME: This should be using a target hook. + bool UseCodeAlign = false; + if (const MCSectionMachO *S = dyn_cast<MCSectionMachO>( + Out.getCurrentSection())) + UseCodeAlign = S->hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS); + if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) && + ValueSize == 1 && UseCodeAlign) { Out.EmitCodeAlignment(Alignment, MaxBytesToFill); - else + } else { // FIXME: Target specific behavior about how the "extra" bytes are filled. Out.EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill); + } return false; } @@ -1239,6 +1322,52 @@ bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) { return false; } +/// ParseDirectiveELFType +/// ::= .type identifier , @attribute +bool AsmParser::ParseDirectiveELFType() { + StringRef Name; + if (ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = CreateSymbol(Name); + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in '.type' directive"); + Lex(); + + if (Lexer.isNot(AsmToken::At)) + return TokError("expected '@' before type"); + Lex(); + + StringRef Type; + SMLoc TypeLoc; + + TypeLoc = Lexer.getLoc(); + if (ParseIdentifier(Type)) + return TokError("expected symbol type in directive"); + + MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Type) + .Case("function", MCSA_ELF_TypeFunction) + .Case("object", MCSA_ELF_TypeObject) + .Case("tls_object", MCSA_ELF_TypeTLS) + .Case("common", MCSA_ELF_TypeCommon) + .Case("notype", MCSA_ELF_TypeNoType) + .Default(MCSA_Invalid); + + if (Attr == MCSA_Invalid) + return Error(TypeLoc, "unsupported attribute in '.type' directive"); + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.type' directive"); + + Lex(); + + Out.EmitSymbolAttribute(Sym, Attr); + + return false; +} + /// ParseDirectiveDarwinSymbolDesc /// ::= .desc identifier , expression bool AsmParser::ParseDirectiveDarwinSymbolDesc() { @@ -1316,7 +1445,7 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) { return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't " "be less than zero"); - // NOTE: The alignment in the directive is a power of 2 value, the assember + // NOTE: The alignment in the directive is a power of 2 value, the assembler // may internally end up wanting an alignment in bytes. // FIXME: Diagnose overflow. if (Pow2Alignment < 0) @@ -1344,22 +1473,18 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) { /// ::= .zerofill segname , sectname [, identifier , size_expression [ /// , align_expression ]] bool AsmParser::ParseDirectiveDarwinZerofill() { - // FIXME: Handle quoted names here. - - if (Lexer.isNot(AsmToken::Identifier)) + StringRef Segment; + if (ParseIdentifier(Segment)) return TokError("expected segment name after '.zerofill' directive"); - StringRef Segment = getTok().getString(); - Lex(); if (Lexer.isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lex(); - - if (Lexer.isNot(AsmToken::Identifier)) + + StringRef Section; + if (ParseIdentifier(Section)) return TokError("expected section name after comma in '.zerofill' " "directive"); - StringRef Section = getTok().getString(); - Lex(); // If this is the end of the line all that was wanted was to create the // the section but with no symbol. @@ -1375,13 +1500,13 @@ bool AsmParser::ParseDirectiveDarwinZerofill() { return TokError("unexpected token in directive"); Lex(); - if (Lexer.isNot(AsmToken::Identifier)) + SMLoc IDLoc = Lexer.getLoc(); + StringRef IDStr; + if (ParseIdentifier(IDStr)) return TokError("expected identifier in directive"); // handle the identifier as the key symbol. - SMLoc IDLoc = Lexer.getLoc(); - MCSymbol *Sym = CreateSymbol(getTok().getString()); - Lex(); + MCSymbol *Sym = CreateSymbol(IDStr); if (Lexer.isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); @@ -1410,7 +1535,7 @@ bool AsmParser::ParseDirectiveDarwinZerofill() { return Error(SizeLoc, "invalid '.zerofill' directive size, can't be less " "than zero"); - // NOTE: The alignment in the directive is a power of 2 value, the assember + // NOTE: The alignment in the directive is a power of 2 value, the assembler // may internally end up wanting an alignment in bytes. // FIXME: Diagnose overflow. if (Pow2Alignment < 0) @@ -1431,6 +1556,60 @@ bool AsmParser::ParseDirectiveDarwinZerofill() { return false; } +/// ParseDirectiveDarwinTBSS +/// ::= .tbss identifier, size, align +bool AsmParser::ParseDirectiveDarwinTBSS() { + SMLoc IDLoc = Lexer.getLoc(); + StringRef Name; + if (ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = CreateSymbol(Name); + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + int64_t Size; + SMLoc SizeLoc = Lexer.getLoc(); + if (ParseAbsoluteExpression(Size)) + return true; + + int64_t Pow2Alignment = 0; + SMLoc Pow2AlignmentLoc; + if (Lexer.is(AsmToken::Comma)) { + Lex(); + Pow2AlignmentLoc = Lexer.getLoc(); + if (ParseAbsoluteExpression(Pow2Alignment)) + return true; + } + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.tbss' directive"); + + Lex(); + + if (Size < 0) + return Error(SizeLoc, "invalid '.tbss' directive size, can't be less than" + "zero"); + + // FIXME: Diagnose overflow. + if (Pow2Alignment < 0) + return Error(Pow2AlignmentLoc, "invalid '.tbss' alignment, can't be less" + "than zero"); + + if (!Sym->isUndefined()) + return Error(IDLoc, "invalid symbol redefinition"); + + Out.EmitTBSSSymbol(Ctx.getMachOSection("__DATA", "__thread_bss", + MCSectionMachO::S_THREAD_LOCAL_ZEROFILL, + 0, SectionKind::getThreadBSS()), + Sym, Size, 1 << Pow2Alignment); + + return false; +} + /// ParseDirectiveDarwinSubsectionsViaSymbols /// ::= .subsections_via_symbols bool AsmParser::ParseDirectiveDarwinSubsectionsViaSymbols() { diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp index f6e9636..a792d56 100644 --- a/lib/MC/MCSection.cpp +++ b/lib/MC/MCSection.cpp @@ -20,30 +20,3 @@ using namespace llvm; MCSection::~MCSection() { } -//===----------------------------------------------------------------------===// -// MCSectionCOFF -//===----------------------------------------------------------------------===// - -MCSectionCOFF *MCSectionCOFF:: -Create(StringRef Name, bool IsDirective, SectionKind K, MCContext &Ctx) { - char *NameCopy = static_cast<char*>( - Ctx.Allocate(Name.size(), /*Alignment=*/1)); - memcpy(NameCopy, Name.data(), Name.size()); - return new (Ctx) MCSectionCOFF(StringRef(NameCopy, Name.size()), - IsDirective, K); -} - -void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, - raw_ostream &OS) const { - - if (isDirective()) { - OS << getName() << '\n'; - return; - } - OS << "\t.section\t" << getName() << ",\""; - if (getKind().isText()) - OS << 'x'; - if (getKind().isWriteable()) - OS << 'w'; - OS << "\"\n"; -} diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp new file mode 100644 index 0000000..d57bb0c --- /dev/null +++ b/lib/MC/MCSectionCOFF.cpp @@ -0,0 +1,76 @@ +//===- lib/MC/MCSectionCOFF.cpp - COFF Code Section Representation --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +MCSectionCOFF::~MCSectionCOFF() {} // anchor. + +// ShouldOmitSectionDirective - Decides whether a '.section' directive +// should be printed before the section name +bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name, + const MCAsmInfo &MAI) const { + + // FIXME: Does .section .bss/.data/.text work everywhere?? + if (Name == ".text" || Name == ".data" || Name == ".bss") + return true; + + return false; +} + +void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS) const { + + // standard sections don't require the '.section' + if (ShouldOmitSectionDirective(SectionName, MAI)) { + OS << '\t' << getSectionName() << '\n'; + return; + } + + OS << "\t.section\t" << getSectionName() << ",\""; + if (getKind().isText()) + OS << 'x'; + if (getKind().isWriteable()) + OS << 'w'; + else + OS << 'r'; + if (getCharacteristics() & MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE) + OS << 'n'; + OS << "\"\n"; + + if (getCharacteristics() & MCSectionCOFF::IMAGE_SCN_LNK_COMDAT) { + switch (Selection) { + case IMAGE_COMDAT_SELECT_NODUPLICATES: + OS << "\t.linkonce one_only\n"; + break; + case IMAGE_COMDAT_SELECT_ANY: + OS << "\t.linkonce discard\n"; + break; + case IMAGE_COMDAT_SELECT_SAME_SIZE: + OS << "\t.linkonce same_size\n"; + break; + case IMAGE_COMDAT_SELECT_EXACT_MATCH: + OS << "\t.linkonce same_contents\n"; + break; + //NOTE: as of binutils 2.20, there is no way to specifiy select largest + // with the .linkonce directive. For now, we treat it as an invalid + // comdat selection value. + case IMAGE_COMDAT_SELECT_LARGEST: + // OS << "\t.linkonce largest\n"; + // break; + default: + assert (0 && "unsupported COFF selection type"); + break; + } + } +} diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp index 3a18cee..ded3b20 100644 --- a/lib/MC/MCSectionMachO.cpp +++ b/lib/MC/MCSectionMachO.cpp @@ -34,7 +34,14 @@ static const struct { { "interposing", "S_INTERPOSING" }, // 0x0D { "16byte_literals", "S_16BYTE_LITERALS" }, // 0x0E { 0, /*FIXME??*/ "S_DTRACE_DOF" }, // 0x0F - { 0, /*FIXME??*/ "S_LAZY_DYLIB_SYMBOL_POINTERS" } // 0x10 + { 0, /*FIXME??*/ "S_LAZY_DYLIB_SYMBOL_POINTERS" }, // 0x10 + { "thread_local_regular", "S_THREAD_LOCAL_REGULAR" }, // 0x11 + { "thread_local_zerofill", "S_THREAD_LOCAL_ZEROFILL" }, // 0x12 + { "thread_local_variables", "S_THREAD_LOCAL_VARIABLES" }, // 0x13 + { "thread_local_variable_pointers", + "S_THREAD_LOCAL_VARIABLE_POINTERS" }, // 0x14 + { "thread_local_init_function_pointers", + "S_THREAD_LOCAL_INIT_FUNCTION_POINTERS"}, // 0x15 }; @@ -66,7 +73,7 @@ ENTRY(0 /*FIXME*/, S_ATTR_LOC_RELOC) MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section, unsigned TAA, unsigned reserved2, SectionKind K) - : MCSection(K), TypeAndAttributes(TAA), Reserved2(reserved2) { + : MCSection(SV_MachO, K), TypeAndAttributes(TAA), Reserved2(reserved2) { assert(Segment.size() <= 16 && Section.size() <= 16 && "Segment or section string too long"); for (unsigned i = 0; i != 16; ++i) { diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 4f484a2..573f2a3 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -48,7 +48,7 @@ void MCStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue, EmitValue(E, 1, AddrSpace); } -/// EmitRawText - If this file is backed by a assembly streamer, this dumps +/// EmitRawText - If this file is backed by an assembly streamer, this dumps /// the specified string in the output .s file. This capability is /// indicated by the hasRawTextSupport() predicate. void MCStreamer::EmitRawText(StringRef String) { diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp index 3fb1233..07751f7 100644 --- a/lib/MC/MCSymbol.cpp +++ b/lib/MC/MCSymbol.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCExpr.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -38,6 +39,17 @@ static bool NameNeedsQuoting(StringRef Str) { return false; } +void MCSymbol::setVariableValue(const MCExpr *Value) { + assert(Value && "Invalid variable value!"); + assert((isUndefined() || (isAbsolute() && isa<MCConstantExpr>(Value))) && + "Invalid redefinition!"); + this->Value = Value; + + // Mark the variable as absolute as appropriate. + if (isa<MCConstantExpr>(Value)) + setAbsolute(); +} + void MCSymbol::print(raw_ostream &OS) const { // The name for this MCSymbol is required to be a valid target name. However, // some targets support quoting names with funny characters. If the name diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index a533ccf..3207e99 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -16,6 +16,7 @@ #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCMachOSymbolFlags.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachO.h" @@ -58,6 +59,20 @@ static bool isFixupKindRIPRel(unsigned Kind) { Kind == X86::reloc_riprel_4byte_movq_load; } +static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) { + // Undefined symbols are always extern. + if (SD->Symbol->isUndefined()) + return true; + + // References to weak definitions require external relocation entries; the + // definition may not always be the one in the same object file. + if (SD->getFlags() & SF_WeakDefinition) + return true; + + // Otherwise, we can use an internal relocation. + return false; +} + namespace { class MachObjectWriterImpl { @@ -130,7 +145,8 @@ class MachObjectWriterImpl { RIT_Pair = 1, RIT_Difference = 2, RIT_PreboundLazyPointer = 3, - RIT_LocalDifference = 4 + RIT_LocalDifference = 4, + RIT_TLV = 5 }; /// X86_64 uses its own relocation types. @@ -143,7 +159,8 @@ class MachObjectWriterImpl { RIT_X86_64_Subtractor = 5, RIT_X86_64_Signed1 = 6, RIT_X86_64_Signed2 = 7, - RIT_X86_64_Signed4 = 8 + RIT_X86_64_Signed4 = 8, + RIT_X86_64_TLV = 9 }; /// MachSymbolData - Helper struct for containing some precomputed information @@ -155,8 +172,8 @@ class MachObjectWriterImpl { // Support lexicographic sorting. bool operator<(const MachSymbolData &RHS) const { - const std::string &Name = SymbolData->getSymbol().getName(); - return Name < RHS.SymbolData->getSymbol().getName(); + return SymbolData->getSymbol().getName() < + RHS.SymbolData->getSymbol().getName(); } }; @@ -170,6 +187,7 @@ class MachObjectWriterImpl { llvm::DenseMap<const MCSectionData*, std::vector<MachRelocationEntry> > Relocations; + llvm::DenseMap<const MCSectionData*, unsigned> IndirectSymBase; /// @} /// @name Symbol Table Data @@ -289,9 +307,7 @@ public: uint64_t Start = OS.tell(); (void) Start; - // FIXME: cast<> support! - const MCSectionMachO &Section = - static_cast<const MCSectionMachO&>(SD.getSection()); + const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection()); WriteBytes(Section.getSectionName(), 16); WriteBytes(Section.getSegmentName(), 16); if (Is64Bit) { @@ -312,7 +328,7 @@ public: Write32(NumRelocations ? RelocationsStart : 0); Write32(NumRelocations); Write32(Flags); - Write32(0); // reserved1 + Write32(IndirectSymBase.lookup(&SD)); // reserved1 Write32(Section.getStubSize()); // reserved2 if (Is64Bit) Write32(0); // reserved3 @@ -404,7 +420,7 @@ public: // Compute the symbol address. if (Symbol.isDefined()) { if (Symbol.isAbsolute()) { - llvm_unreachable("FIXME: Not yet implemented!"); + Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue(); } else { Address = Layout.getSymbolAddress(&Data); } @@ -456,14 +472,17 @@ public: void RecordX86_64Relocation(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, - const MCAsmFixup &Fixup, MCValue Target, + const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { - unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind); - unsigned IsRIPRel = isFixupKindRIPRel(Fixup.Kind); - unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind); + unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind()); + unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind()); + unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); // See <reloc.h>. - uint32_t Address = Layout.getFragmentOffset(Fragment) + Fixup.Offset; + uint32_t FixupOffset = + Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); + uint32_t FixupAddress = + Layout.getFragmentAddress(Fragment) + Fixup.getOffset(); int64_t Value = 0; unsigned Index = 0; unsigned IsExtern = 0; @@ -532,7 +551,7 @@ public: Type = RIT_X86_64_Unsigned; MachRelocationEntry MRE; - MRE.Word0 = Address; + MRE.Word0 = FixupOffset; MRE.Word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | @@ -548,6 +567,17 @@ public: MCSymbolData &SD = Asm.getSymbolData(*Symbol); const MCSymbolData *Base = Asm.getAtom(Layout, &SD); + // Relocations inside debug sections always use local relocations when + // possible. This seems to be done because the debugger doesn't fully + // understand x86_64 relocation entries, and expects to find values that + // have already been fixed up. + if (Symbol->isInSection()) { + const MCSectionMachO &Section = static_cast<const MCSectionMachO&>( + Fragment->getParent()->getSection()); + if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG)) + Base = 0; + } + // x86_64 almost always uses external relocations, except when there is no // symbol to use as a base address (a local symbol with no preceeding // non-local symbol). @@ -558,14 +588,17 @@ public: // Add the local offset, if needed. if (Base != &SD) Value += Layout.getSymbolAddress(&SD) - Layout.getSymbolAddress(Base); - } else { + } else if (Symbol->isInSection()) { // The index is the section ordinal (1-based). Index = SD.getFragment()->getParent()->getOrdinal() + 1; IsExtern = 0; Value += Layout.getSymbolAddress(&SD); if (IsPCRel) - Value -= Address + (1 << Log2Size); + Value -= FixupAddress + (1 << Log2Size); + } else { + report_fatal_error("unsupported relocation of undefined symbol '" + + Symbol->getName() + "'"); } MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind(); @@ -575,14 +608,37 @@ public: // x86_64 distinguishes movq foo@GOTPCREL so that the linker can // rewrite the movq to an leaq at link time if the symbol ends up in // the same linkage unit. - if (unsigned(Fixup.Kind) == X86::reloc_riprel_4byte_movq_load) + if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load) Type = RIT_X86_64_GOTLoad; else Type = RIT_X86_64_GOT; - } else if (Modifier != MCSymbolRefExpr::VK_None) + } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { + Type = RIT_X86_64_TLV; + } else if (Modifier != MCSymbolRefExpr::VK_None) { report_fatal_error("unsupported symbol modifier in relocation"); - else + } else { Type = RIT_X86_64_Signed; + + // The Darwin x86_64 relocation format has a problem where it cannot + // encode an address (L<foo> + <constant>) which is outside the atom + // containing L<foo>. Generally, this shouldn't occur but it does + // happen when we have a RIPrel instruction with data following the + // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel + // adjustment Darwin x86_64 uses, the offset is still negative and + // the linker has no way to recognize this. + // + // To work around this, Darwin uses several special relocation types + // to indicate the offsets. However, the specification or + // implementation of these seems to also be incomplete; they should + // adjust the addend as well based on the actual encoded instruction + // (the additional bias), but instead appear to just look at the + // final offset. + switch (-(Target.getConstant() + (1LL << Log2Size))) { + case 1: Type = RIT_X86_64_Signed1; break; + case 2: Type = RIT_X86_64_Signed2; break; + case 4: Type = RIT_X86_64_Signed4; break; + } + } } else { if (Modifier != MCSymbolRefExpr::VK_None) report_fatal_error("unsupported symbol modifier in branch " @@ -590,27 +646,6 @@ public: Type = RIT_X86_64_Branch; } - - // The Darwin x86_64 relocation format has a problem where it cannot - // encode an address (L<foo> + <constant>) which is outside the atom - // containing L<foo>. Generally, this shouldn't occur but it does happen - // when we have a RIPrel instruction with data following the relocation - // entry (e.g., movb $012, L0(%rip)). Even with the PCrel adjustment - // Darwin x86_64 uses, the offset is still negative and the linker has - // no way to recognize this. - // - // To work around this, Darwin uses several special relocation types to - // indicate the offsets. However, the specification or implementation of - // these seems to also be incomplete; they should adjust the addend as - // well based on the actual encoded instruction (the additional bias), - // but instead appear to just look at the final offset. - if (IsRIPRel) { - switch (-(Target.getConstant() + (1LL << Log2Size))) { - case 1: Type = RIT_X86_64_Signed1; break; - case 2: Type = RIT_X86_64_Signed2; break; - case 4: Type = RIT_X86_64_Signed4; break; - } - } } else { if (Modifier == MCSymbolRefExpr::VK_GOT) { Type = RIT_X86_64_GOT; @@ -621,6 +656,8 @@ public: // required to include any necessary offset directly. Type = RIT_X86_64_GOT; IsPCRel = 1; + } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { + report_fatal_error("TLVP symbol modifier should have been rip-rel"); } else if (Modifier != MCSymbolRefExpr::VK_None) report_fatal_error("unsupported symbol modifier in relocation"); else @@ -633,7 +670,7 @@ public: // struct relocation_info (8 bytes) MachRelocationEntry MRE; - MRE.Word0 = Address; + MRE.Word0 = FixupOffset; MRE.Word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | @@ -645,11 +682,11 @@ public: void RecordScatteredRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, - const MCAsmFixup &Fixup, MCValue Target, + const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { - uint32_t Address = Layout.getFragmentOffset(Fragment) + Fixup.Offset; - unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind); - unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind); + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind()); + unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); unsigned Type = RIT_Vanilla; // See <reloc.h>. @@ -692,40 +729,49 @@ public: } MachRelocationEntry MRE; - MRE.Word0 = ((Address << 0) | - (Type << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | + MRE.Word0 = ((FixupOffset << 0) | + (Type << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | RF_Scattered); MRE.Word1 = Value; Relocations[Fragment->getParent()].push_back(MRE); } void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment *Fragment, const MCAsmFixup &Fixup, + const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { if (Is64Bit) { RecordX86_64Relocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); return; } - unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind); - unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind); + unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind()); + unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); // If this is a difference or a defined symbol plus an offset, then we need // a scattered relocation entry. + // Differences always require scattered relocations. + if (Target.getSymB()) + return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup, + Target, FixedValue); + + // Get the symbol data, if any. + MCSymbolData *SD = 0; + if (Target.getSymA()) + SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); + + // If this is an internal relocation with an offset, it also needs a + // scattered relocation entry. uint32_t Offset = Target.getConstant(); if (IsPCRel) Offset += 1 << Log2Size; - if (Target.getSymB() || - (Target.getSymA() && !Target.getSymA()->getSymbol().isUndefined() && - Offset)) { - RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,Target,FixedValue); - return; - } + if (Offset && SD && !doesSymbolRequireExternRelocation(SD)) + return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup, + Target, FixedValue); // See <reloc.h>. - uint32_t Address = Layout.getFragmentOffset(Fragment) + Fixup.Offset; + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); uint32_t Value = 0; unsigned Index = 0; unsigned IsExtern = 0; @@ -739,12 +785,15 @@ public: Type = RIT_Vanilla; Value = 0; } else { - const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); - MCSymbolData *SD = &Asm.getSymbolData(*Symbol); - - if (Symbol->isUndefined()) { + // Check whether we need an external or internal relocation. + if (doesSymbolRequireExternRelocation(SD)) { IsExtern = 1; Index = SD->getIndex(); + // For external relocations, make sure to offset the fixup value to + // compensate for the addend of the symbol address, if it was + // undefined. This occurs with weak definitions, for example. + if (!SD->Symbol->isUndefined()) + FixedValue -= Layout.getSymbolAddress(SD); Value = 0; } else { // The index is the section ordinal (1-based). @@ -757,7 +806,7 @@ public: // struct relocation_info (8 bytes) MachRelocationEntry MRE; - MRE.Word0 = Address; + MRE.Word0 = FixupOffset; MRE.Word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | @@ -775,29 +824,37 @@ public: // FIXME: Revisit this when the dust settles. // Bind non lazy symbol pointers first. + unsigned IndirectIndex = 0; for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), - ie = Asm.indirect_symbol_end(); it != ie; ++it) { - // FIXME: cast<> support! + ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { const MCSectionMachO &Section = - static_cast<const MCSectionMachO&>(it->SectionData->getSection()); + cast<MCSectionMachO>(it->SectionData->getSection()); if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) continue; + // Initialize the section indirect symbol base, if necessary. + if (!IndirectSymBase.count(it->SectionData)) + IndirectSymBase[it->SectionData] = IndirectIndex; + Asm.getOrCreateSymbolData(*it->Symbol); } // Then lazy symbol pointers and symbol stubs. + IndirectIndex = 0; for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), - ie = Asm.indirect_symbol_end(); it != ie; ++it) { - // FIXME: cast<> support! + ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { const MCSectionMachO &Section = - static_cast<const MCSectionMachO&>(it->SectionData->getSection()); + cast<MCSectionMachO>(it->SectionData->getSection()); if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS && Section.getType() != MCSectionMachO::S_SYMBOL_STUBS) continue; + // Initialize the section indirect symbol base, if necessary. + if (!IndirectSymBase.count(it->SectionData)) + IndirectSymBase[it->SectionData] = IndirectIndex; + // Set the symbol type to undefined lazy, but only on construction. // // FIXME: Do not hardcode. @@ -1111,7 +1168,7 @@ void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) { void MachObjectWriter::RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, - const MCAsmFixup &Fixup, MCValue Target, + const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { ((MachObjectWriterImpl*) Impl)->RecordRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 50025d2..1341d21 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -1382,13 +1382,12 @@ APInt APInt::sqrt() const { // libc sqrt function which will probably use a hardware sqrt computation. // This should be faster than the algorithm below. if (magnitude < 52) { -#if defined( _MSC_VER ) || defined(_MINIX) - // Amazingly, VC++ and Minix don't have round(). +#if HAVE_ROUND return APInt(BitWidth, - uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5); + uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0]))))); #else return APInt(BitWidth, - uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0]))))); + uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5); #endif } diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index d31f34e..ae66110 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -1170,7 +1170,9 @@ public: std::string CPU = sys::getHostCPUName(); if (CPU == "generic") CPU = "(unknown)"; OS << ".\n" +#if (ENABLE_TIMESTAMPS == 1) << " Built " << __DATE__ << " (" << __TIME__ << ").\n" +#endif << " Host: " << sys::getHostTriple() << '\n' << " Host CPU: " << CPU << '\n' << '\n' diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp index 56a171c..7e7ca9d 100644 --- a/lib/Support/ErrorHandling.cpp +++ b/lib/Support/ErrorHandling.cpp @@ -16,6 +16,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/System/Signals.h" #include "llvm/System/Threading.h" #include <cassert> #include <cstdlib> @@ -52,6 +53,12 @@ void llvm::report_fatal_error(const Twine &reason) { } else { ErrorHandler(ErrorHandlerUserData, reason.str()); } + + // If we reached here, we are failing ungracefully. Run the interrupt handlers + // to make sure any special cleanups get done, in particular that we remove + // files registered with RemoveFileOnSignal. + sys::RunInterruptHandlers(); + exit(1); } diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp index 68b41a7..7a04a53 100644 --- a/lib/Support/PrettyStackTrace.cpp +++ b/lib/Support/PrettyStackTrace.cpp @@ -50,8 +50,8 @@ static void PrintCurStackTrace(raw_ostream &OS) { // Integrate with crash reporter. #ifdef __APPLE__ -extern "C" const char *__crashreporter_info__; -const char *__crashreporter_info__ = 0; +static const char *__crashreporter_info__ = 0; +asm(".desc ___crashreporter_info__, 0x10"); #endif diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index 2b262dc..ca0f518 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -23,6 +23,10 @@ static char ascii_tolower(char x) { return x; } +static bool ascii_isdigit(char x) { + return x >= '0' && x <= '9'; +} + /// compare_lower - Compare strings, ignoring case. int StringRef::compare_lower(StringRef RHS) const { for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) { @@ -37,6 +41,30 @@ int StringRef::compare_lower(StringRef RHS) const { return Length < RHS.Length ? -1 : 1; } +/// compare_numeric - Compare strings, handle embedded numbers. +int StringRef::compare_numeric(StringRef RHS) const { + for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) { + if (Data[I] == RHS.Data[I]) + continue; + if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) { + // The longer sequence of numbers is larger. This doesn't really handle + // prefixed zeros well. + for (size_t J = I+1; J != E+1; ++J) { + bool ld = J < Length && ascii_isdigit(Data[J]); + bool rd = J < RHS.Length && ascii_isdigit(RHS.Data[J]); + if (ld != rd) + return rd ? -1 : 1; + if (!rd) + break; + } + } + return Data[I] < RHS.Data[I] ? -1 : 1; + } + if (Length == RHS.Length) + return 0; + return Length < RHS.Length ? -1 : 1; +} + // Compute the edit distance between the two given strings. unsigned StringRef::edit_distance(llvm::StringRef Other, bool AllowReplacements) { diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp index 481f6ba..784b77c 100644 --- a/lib/Support/Timer.cpp +++ b/lib/Support/Timer.cpp @@ -61,6 +61,10 @@ raw_ostream *llvm::CreateInfoOutputFile() { if (OutputFilename == "-") return new raw_fd_ostream(1, false); // stdout. + // Append mode is used because the info output file is opened and closed + // each time -stats or -time-passes wants to print output to it. To + // compensate for this, the test-suite Makefiles have code to delete the + // info output file before running commands which write to it. std::string Error; raw_ostream *Result = new raw_fd_ostream(OutputFilename.c_str(), Error, raw_fd_ostream::F_Append); diff --git a/lib/Support/Twine.cpp b/lib/Support/Twine.cpp index 21504e9..b3ea013 100644 --- a/lib/Support/Twine.cpp +++ b/lib/Support/Twine.cpp @@ -48,10 +48,10 @@ void Twine::printOneChild(raw_ostream &OS, const void *Ptr, OS << *static_cast<const StringRef*>(Ptr); break; case Twine::DecUIKind: - OS << *static_cast<const unsigned int*>(Ptr); + OS << (unsigned)(uintptr_t)Ptr; break; case Twine::DecIKind: - OS << *static_cast<const int*>(Ptr); + OS << (int)(intptr_t)Ptr; break; case Twine::DecULKind: OS << *static_cast<const unsigned long*>(Ptr); @@ -95,10 +95,10 @@ void Twine::printOneChildRepr(raw_ostream &OS, const void *Ptr, << static_cast<const StringRef*>(Ptr) << "\""; break; case Twine::DecUIKind: - OS << "decUI:\"" << *static_cast<const unsigned int*>(Ptr) << "\""; + OS << "decUI:\"" << (unsigned)(uintptr_t)Ptr << "\""; break; case Twine::DecIKind: - OS << "decI:\"" << *static_cast<const int*>(Ptr) << "\""; + OS << "decI:\"" << (int)(intptr_t)Ptr << "\""; break; case Twine::DecULKind: OS << "decUL:\"" << *static_cast<const unsigned long*>(Ptr) << "\""; diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 0b05c54..11cf0ec 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -21,6 +21,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/STLExtras.h" #include <cctype> +#include <cerrno> #include <sys/stat.h> #include <sys/types.h> @@ -399,37 +400,76 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo, if (Flags & F_Excl) OpenFlags |= O_EXCL; - FD = open(Filename, OpenFlags, 0664); - if (FD < 0) { - ErrorInfo = "Error opening output file '" + std::string(Filename) + "'"; - ShouldClose = false; - } else { - ShouldClose = true; + while ((FD = open(Filename, OpenFlags, 0664)) < 0) { + if (errno != EINTR) { + ErrorInfo = "Error opening output file '" + std::string(Filename) + "'"; + ShouldClose = false; + return; + } } + + // Ok, we successfully opened the file, so it'll need to be closed. + ShouldClose = true; } raw_fd_ostream::~raw_fd_ostream() { if (FD < 0) return; flush(); if (ShouldClose) - if (::close(FD) != 0) - error_detected(); + while (::close(FD) != 0) + if (errno != EINTR) { + error_detected(); + break; + } } void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) { assert(FD >= 0 && "File already closed."); pos += Size; - if (::write(FD, Ptr, Size) != (ssize_t) Size) - error_detected(); + ssize_t ret; + + do { + ret = ::write(FD, Ptr, Size); + + if (ret < 0) { + // If it's a recoverable error, swallow it and retry the write. + // + // Ideally we wouldn't ever see EAGAIN or EWOULDBLOCK here, since + // raw_ostream isn't designed to do non-blocking I/O. However, some + // programs, such as old versions of bjam, have mistakenly used + // O_NONBLOCK. For compatibility, emulate blocking semantics by + // spinning until the write succeeds. If you don't want spinning, + // don't use O_NONBLOCK file descriptors with raw_ostream. + if (errno == EINTR || errno == EAGAIN +#ifdef EWOULDBLOCK + || errno == EWOULDBLOCK +#endif + ) + continue; + + // Otherwise it's a non-recoverable error. Note it and quit. + error_detected(); + break; + } + + // The write may have written some or all of the data. Update the + // size and buffer pointer to reflect the remainder that needs + // to be written. If there are no bytes left, we're done. + Ptr += ret; + Size -= ret; + } while (Size > 0); } void raw_fd_ostream::close() { assert(ShouldClose); ShouldClose = false; flush(); - if (::close(FD) != 0) - error_detected(); + while (::close(FD) != 0) + if (errno != EINTR) { + error_detected(); + break; + } FD = -1; } diff --git a/lib/System/Unix/Signals.inc b/lib/System/Unix/Signals.inc index 56bf9e7..9548816 100644 --- a/lib/System/Unix/Signals.inc +++ b/lib/System/Unix/Signals.inc @@ -152,7 +152,9 @@ static RETSIGTYPE SignalHandler(int Sig) { CallBacksToRun[i].first(CallBacksToRun[i].second); } - +void llvm::sys::RunInterruptHandlers() { + SignalHandler(SIGINT); +} void llvm::sys::SetInterruptFunction(void (*IF)()) { SignalsMutex.acquire(); diff --git a/lib/System/Win32/Signals.inc b/lib/System/Win32/Signals.inc index f2b72ca..a3a393c 100644 --- a/lib/System/Win32/Signals.inc +++ b/lib/System/Win32/Signals.inc @@ -189,6 +189,10 @@ static void Cleanup() { LeaveCriticalSection(&CriticalSection); } +void llvm::sys::RunInterruptHandlers() { + Cleanup(); +} + static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) { try { Cleanup(); diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index b08f942..ae7ae59 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -48,7 +48,7 @@ namespace ARMCC { AL }; - inline static CondCodes getOppositeCondition(CondCodes CC){ + inline static CondCodes getOppositeCondition(CondCodes CC) { switch (CC) { default: llvm_unreachable("Unknown condition code"); case EQ: return NE; @@ -67,7 +67,7 @@ namespace ARMCC { case LE: return GT; } } -} +} // namespace ARMCC inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { switch (CC) { @@ -90,6 +90,10 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { } } +/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model +/// operations involving sub-registers. +bool ModelWithRegSequence(); + FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index b4dec0c..f1e6a9f 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -32,6 +32,8 @@ def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2", "ARM v6t2">; def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A", "ARM v7A">; +def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M", + "ARM v7M">; def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2", "Enable VFP2 instructions">; def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3", @@ -42,6 +44,10 @@ def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2", "Enable Thumb2 instructions">; def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", "Enable half-precision floating point">; +def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", + "Enable divide instructions">; +def FeatureT2ExtractPack: SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true", + "Enable Thumb2 extract and pack instructions">; // Some processors have multiply-accumulate instructions that don't // play nicely with other VFP instructions, and it's generally better @@ -123,9 +129,11 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, // V7 Processors. def : Processor<"cortex-a8", CortexA8Itineraries, [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx, - FeatureNEONForFP]>; + FeatureNEONForFP, FeatureT2ExtractPack]>; def : Processor<"cortex-a9", CortexA9Itineraries, - [ArchV7A, FeatureThumb2, FeatureNEON]>; + [ArchV7A, FeatureThumb2, FeatureNEON, FeatureT2ExtractPack]>; +def : ProcNoItin<"cortex-m3", [ArchV7M, FeatureThumb2, FeatureHWDiv]>; +def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureThumb2, FeatureHWDiv]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index a193858..2528854 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" @@ -196,6 +197,42 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return NewMIs[0]; } +bool +ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + DebugLoc DL; + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + bool isKill = true; + + // Add the callee-saved register as live-in unless it's LR and + // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress + // then it's already added to the function and entry block live-in sets. + if (Reg == ARM::LR) { + MachineFunction &MF = *MBB.getParent(); + if (MF.getFrameInfo()->isReturnAddressTaken() && + MF.getRegInfo().isLiveIn(Reg)) + isKill = false; + } + + if (isKill) + MBB.addLiveIn(Reg); + + // Insert the spill to the stack frame. The register is killed at the spill + // + storeRegToStackSlot(MBB, MI, Reg, isKill, + CSI[i].getFrameIdx(), CSI[i].getRegClass(), TRI); + } + return true; +} + // Branch analysis. bool ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, @@ -481,6 +518,10 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { // If this machine instr is a constant pool entry, its size is recorded as // operand #2. return MI->getOperand(2).getImm(); + case ARM::Int_eh_sjlj_longjmp: + return 16; + case ARM::tInt_eh_sjlj_longjmp: + return 10; case ARM::Int_eh_sjlj_setjmp: case ARM::Int_eh_sjlj_setjmp_nofp: return 24; @@ -540,16 +581,17 @@ bool ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned& SrcSubIdx, unsigned& DstSubIdx) const { - SrcSubIdx = DstSubIdx = 0; // No sub-registers. - switch (MI.getOpcode()) { default: break; case ARM::VMOVS: case ARM::VMOVD: case ARM::VMOVDneon: - case ARM::VMOVQ: { + case ARM::VMOVQ: + case ARM::VMOVQQ : { SrcReg = MI.getOperand(1).getReg(); DstReg = MI.getOperand(0).getReg(); + SrcSubIdx = MI.getOperand(1).getSubReg(); + DstSubIdx = MI.getOperand(0).getSubReg(); return true; } case ARM::MOVr: @@ -564,6 +606,8 @@ ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI, "Invalid ARM MOV instruction"); SrcReg = MI.getOperand(1).getReg(); DstReg = MI.getOperand(0).getReg(); + SrcSubIdx = MI.getOperand(1).getSubReg(); + DstSubIdx = MI.getOperand(0).getSubReg(); return true; } } @@ -654,10 +698,8 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { // tGPR is used sometimes in ARM instructions that need to avoid using // certain registers. Just treat it as GPR here. if (DestRC == ARM::tGPRRegisterClass) @@ -679,6 +721,12 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, SrcRC == ARM::QPR_8RegisterClass) SrcRC = ARM::QPRRegisterClass; + // Allow QQPR / QQPR_VFP2 cross-class copies. + if (DestRC == ARM::QQPR_VFP2RegisterClass) + DestRC = ARM::QQPRRegisterClass; + if (SrcRC == ARM::QQPR_VFP2RegisterClass) + SrcRC = ARM::QQPRRegisterClass; + // Disallow copies of unequal sizes. if (DestRC != SrcRC && DestRC->getSize() != SrcRC->getSize()) return false; @@ -703,20 +751,36 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, Opc = ARM::VMOVDneon; else if (DestRC == ARM::QPRRegisterClass) Opc = ARM::VMOVQ; + else if (DestRC == ARM::QQPRRegisterClass) + Opc = ARM::VMOVQQ; + else if (DestRC == ARM::QQQQPRRegisterClass) + Opc = ARM::VMOVQQQQ; else return false; - AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg) - .addReg(SrcReg)); + AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg).addReg(SrcReg)); } return true; } +static const +MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, + unsigned Reg, unsigned SubIdx, unsigned State, + const TargetRegisterInfo *TRI) { + if (!SubIdx) + return MIB.addReg(Reg, State); + + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); + return MIB.addReg(Reg, State, SubIdx); +} + void ARMBaseInstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); @@ -738,45 +802,82 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)); + } else if (RC == ARM::SPRRegisterClass) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else if (RC == ARM::DPRRegisterClass || RC == ARM::DPR_VFP2RegisterClass || RC == ARM::DPR_8RegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::SPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else { - assert((RC == ARM::QPRRegisterClass || - RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!"); + } else if (RC == ARM::QPRRegisterClass || + RC == ARM::QPR_VFP2RegisterClass || + RC == ARM::QPR_8RegisterClass) { // FIXME: Neon instructions should support predicates - if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) { + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q)) .addFrameIndex(FI).addImm(128) - .addMemOperand(MMO) - .addReg(SrcReg, getKillRegState(isKill))); + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO)); } else { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ)). - addReg(SrcReg, getKillRegState(isKill)) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ)) + .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI) .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) .addMemOperand(MMO)); } + } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){ + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + // FIXME: It's possible to only store part of the QQ register if the + // spilled def has a sub-register index. + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VST2q32)) + .addFrameIndex(FI).addImm(128); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); + AddDefaultPred(MIB.addMemOperand(MMO)); + } else { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD)) + .addFrameIndex(FI) + .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addMemOperand(MMO); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); + AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); + } + } else { + assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!"); + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD)) + .addFrameIndex(FI) + .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addMemOperand(MMO); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); + AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); } } void ARMBaseInstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); - MachineMemOperand *MMO = MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), MachineMemOperand::MOLoad, 0, @@ -791,20 +892,18 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (RC == ARM::GPRRegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg) .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)); + } else if (RC == ARM::SPRRegisterClass) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else if (RC == ARM::DPRRegisterClass || RC == ARM::DPR_VFP2RegisterClass || RC == ARM::DPR_8RegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::SPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else { - assert((RC == ARM::QPRRegisterClass || - RC == ARM::QPR_VFP2RegisterClass || - RC == ARM::QPR_8RegisterClass) && "Unknown regclass!"); - if (Align >= 16 - && (getRegisterInfo().canRealignStack(MF))) { + } else if (RC == ARM::QPRRegisterClass || + RC == ARM::QPR_VFP2RegisterClass || + RC == ARM::QPR_8RegisterClass) { + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q), DestReg) .addFrameIndex(FI).addImm(128) .addMemOperand(MMO)); @@ -814,6 +913,40 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) .addMemOperand(MMO)); } + } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){ + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLD2q32)); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); + AddDefaultPred(MIB.addFrameIndex(FI).addImm(128).addMemOperand(MMO)); + } else { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) + .addFrameIndex(FI) + .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addMemOperand(MMO); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); + AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); + } + } else { + assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!"); + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) + .addFrameIndex(FI) + .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addMemOperand(MMO); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI); + AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI); } } @@ -930,8 +1063,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, DstSubReg) .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); } - } - else if (Opc == ARM::VMOVD) { + } else if (Opc == ARM::VMOVD || Opc == ARM::VMOVDneon) { unsigned Pred = MI->getOperand(2).getImm(); unsigned PredReg = MI->getOperand(3).getReg(); if (OpNum == 0) { // move -> store @@ -957,6 +1089,56 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, DstSubReg) .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); } + } else if (Opc == ARM::VMOVQ) { + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Pred = MI->getOperand(2).getImm(); + unsigned PredReg = MI->getOperand(3).getReg(); + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned SrcSubReg = MI->getOperand(1).getSubReg(); + bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); + if (MFI.getObjectAlignment(FI) >= 16 && + getRegisterInfo().canRealignStack(MF)) { + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VST1q)) + .addFrameIndex(FI).addImm(128) + .addReg(SrcReg, + getKillRegState(isKill) | getUndefRegState(isUndef), + SrcSubReg) + .addImm(Pred).addReg(PredReg); + } else { + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTMQ)) + .addReg(SrcReg, + getKillRegState(isKill) | getUndefRegState(isUndef), + SrcSubReg) + .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) + .addImm(Pred).addReg(PredReg); + } + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned DstSubReg = MI->getOperand(0).getSubReg(); + bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); + if (MFI.getObjectAlignment(FI) >= 16 && + getRegisterInfo().canRealignStack(MF)) { + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLD1q)) + .addReg(DstReg, + RegState::Define | + getDeadRegState(isDead) | + getUndefRegState(isUndef), + DstSubReg) + .addFrameIndex(FI).addImm(128).addImm(Pred).addReg(PredReg); + } else { + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDMQ)) + .addReg(DstReg, + RegState::Define | + getDeadRegState(isDead) | + getUndefRegState(isUndef), + DstSubReg) + .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) + .addImm(Pred).addReg(PredReg); + } + } } return NewMI; @@ -985,12 +1167,13 @@ ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, Opc == ARM::tMOVtgpr2gpr || Opc == ARM::tMOVgpr2tgpr) { return true; - } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD) { + } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD || + Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) { return true; - } else if (Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) { - return false; // FIXME } + // FIXME: VMOVQQ and VMOVQQQQ? + return false; } diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 7a5630e..b566271 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -200,6 +200,11 @@ public: virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0; const ARMSubtarget &getSubtarget() const { return Subtarget; } + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + // Branch analysis. virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, @@ -257,17 +262,20 @@ public: MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index bc12187..82458d2 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -259,10 +259,10 @@ ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, unsigned SubIdx) const { switch (SubIdx) { default: return 0; - case 1: - case 2: - case 3: - case 4: + case ARM::ssub_0: + case ARM::ssub_1: + case ARM::ssub_2: + case ARM::ssub_3: { // S sub-registers. if (A->getSize() == 8) { if (B == &ARM::SPR_8RegClass) @@ -273,22 +273,201 @@ ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return &ARM::DPR_VFP2RegClass; } - assert(A->getSize() == 16 && "Expecting a Q register class!"); - if (B == &ARM::SPR_8RegClass) - return &ARM::QPR_8RegClass; - return &ARM::QPR_VFP2RegClass; - case 5: - case 6: - // D sub-registers. - if (B == &ARM::DPR_VFP2RegClass) + if (A->getSize() == 16) { + if (B == &ARM::SPR_8RegClass) + return &ARM::QPR_8RegClass; return &ARM::QPR_VFP2RegClass; - if (B == &ARM::DPR_8RegClass) - return &ARM::QPR_8RegClass; + } + + if (A->getSize() == 32) { + if (B == &ARM::SPR_8RegClass) + return 0; // Do not allow coalescing! + return &ARM::QQPR_VFP2RegClass; + } + + assert(A->getSize() == 64 && "Expecting a QQQQ register class!"); + return 0; // Do not allow coalescing! + } + case ARM::dsub_0: + case ARM::dsub_1: + case ARM::dsub_2: + case ARM::dsub_3: { + // D sub-registers. + if (A->getSize() == 16) { + if (B == &ARM::DPR_VFP2RegClass) + return &ARM::QPR_VFP2RegClass; + if (B == &ARM::DPR_8RegClass) + return 0; // Do not allow coalescing! + return A; + } + + if (A->getSize() == 32) { + if (B == &ARM::DPR_VFP2RegClass) + return &ARM::QQPR_VFP2RegClass; + if (B == &ARM::DPR_8RegClass) + return 0; // Do not allow coalescing! + return A; + } + + assert(A->getSize() == 64 && "Expecting a QQQQ register class!"); + if (B != &ARM::DPRRegClass) + return 0; // Do not allow coalescing! return A; } + case ARM::dsub_4: + case ARM::dsub_5: + case ARM::dsub_6: + case ARM::dsub_7: { + // D sub-registers of QQQQ registers. + if (A->getSize() == 64 && B == &ARM::DPRRegClass) + return A; + return 0; // Do not allow coalescing! + } + + case ARM::qsub_0: + case ARM::qsub_1: { + // Q sub-registers. + if (A->getSize() == 32) { + if (B == &ARM::QPR_VFP2RegClass) + return &ARM::QQPR_VFP2RegClass; + if (B == &ARM::QPR_8RegClass) + return 0; // Do not allow coalescing! + return A; + } + + assert(A->getSize() == 64 && "Expecting a QQQQ register class!"); + if (B == &ARM::QPRRegClass) + return A; + return 0; // Do not allow coalescing! + } + case ARM::qsub_2: + case ARM::qsub_3: { + // Q sub-registers of QQQQ registers. + if (A->getSize() == 64 && B == &ARM::QPRRegClass) + return A; + return 0; // Do not allow coalescing! + } + } return 0; } +bool +ARMBaseRegisterInfo::canCombinedSubRegIndex(const TargetRegisterClass *RC, + SmallVectorImpl<unsigned> &SubIndices, + unsigned &NewSubIdx) const { + + unsigned Size = RC->getSize() * 8; + if (Size < 6) + return 0; + + NewSubIdx = 0; // Whole register. + unsigned NumRegs = SubIndices.size(); + if (NumRegs == 8) { + // 8 D registers -> 1 QQQQ register. + return (Size == 512 && + SubIndices[0] == ARM::dsub_0 && + SubIndices[1] == ARM::dsub_1 && + SubIndices[2] == ARM::dsub_2 && + SubIndices[3] == ARM::dsub_3 && + SubIndices[4] == ARM::dsub_4 && + SubIndices[5] == ARM::dsub_5 && + SubIndices[6] == ARM::dsub_6 && + SubIndices[7] == ARM::dsub_7); + } else if (NumRegs == 4) { + if (SubIndices[0] == ARM::qsub_0) { + // 4 Q registers -> 1 QQQQ register. + return (Size == 512 && + SubIndices[1] == ARM::qsub_1 && + SubIndices[2] == ARM::qsub_2 && + SubIndices[3] == ARM::qsub_3); + } else if (SubIndices[0] == ARM::dsub_0) { + // 4 D registers -> 1 QQ register. + if (Size >= 256 && + SubIndices[1] == ARM::dsub_1 && + SubIndices[2] == ARM::dsub_2 && + SubIndices[3] == ARM::dsub_3) { + if (Size == 512) + NewSubIdx = ARM::qqsub_0; + return true; + } + } else if (SubIndices[0] == ARM::dsub_4) { + // 4 D registers -> 1 QQ register (2nd). + if (Size == 512 && + SubIndices[1] == ARM::dsub_5 && + SubIndices[2] == ARM::dsub_6 && + SubIndices[3] == ARM::dsub_7) { + NewSubIdx = ARM::qqsub_1; + return true; + } + } else if (SubIndices[0] == ARM::ssub_0) { + // 4 S registers -> 1 Q register. + if (Size >= 128 && + SubIndices[1] == ARM::ssub_1 && + SubIndices[2] == ARM::ssub_2 && + SubIndices[3] == ARM::ssub_3) { + if (Size >= 256) + NewSubIdx = ARM::qsub_0; + return true; + } + } + } else if (NumRegs == 2) { + if (SubIndices[0] == ARM::qsub_0) { + // 2 Q registers -> 1 QQ register. + if (Size >= 256 && SubIndices[1] == ARM::qsub_1) { + if (Size == 512) + NewSubIdx = ARM::qqsub_0; + return true; + } + } else if (SubIndices[0] == ARM::qsub_2) { + // 2 Q registers -> 1 QQ register (2nd). + if (Size == 512 && SubIndices[1] == ARM::qsub_3) { + NewSubIdx = ARM::qqsub_1; + return true; + } + } else if (SubIndices[0] == ARM::dsub_0) { + // 2 D registers -> 1 Q register. + if (Size >= 128 && SubIndices[1] == ARM::dsub_1) { + if (Size >= 256) + NewSubIdx = ARM::qsub_0; + return true; + } + } else if (SubIndices[0] == ARM::dsub_2) { + // 2 D registers -> 1 Q register (2nd). + if (Size >= 256 && SubIndices[1] == ARM::dsub_3) { + NewSubIdx = ARM::qsub_1; + return true; + } + } else if (SubIndices[0] == ARM::dsub_4) { + // 2 D registers -> 1 Q register (3rd). + if (Size == 512 && SubIndices[1] == ARM::dsub_5) { + NewSubIdx = ARM::qsub_2; + return true; + } + } else if (SubIndices[0] == ARM::dsub_6) { + // 2 D registers -> 1 Q register (3rd). + if (Size == 512 && SubIndices[1] == ARM::dsub_7) { + NewSubIdx = ARM::qsub_3; + return true; + } + } else if (SubIndices[0] == ARM::ssub_0) { + // 2 S registers -> 1 D register. + if (SubIndices[1] == ARM::ssub_1) { + if (Size >= 128) + NewSubIdx = ARM::dsub_0; + return true; + } + } else if (SubIndices[0] == ARM::ssub_2) { + // 2 S registers -> 1 D register (2nd). + if (Size >= 128 && SubIndices[1] == ARM::ssub_3) { + NewSubIdx = ARM::dsub_1; + return true; + } + } + } + return false; +} + + const TargetRegisterClass * ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const { return ARM::GPRRegisterClass; @@ -481,7 +660,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, /// bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return ((DisableFramePointerElim(MF) && MFI->hasCalls())|| + return ((DisableFramePointerElim(MF) && MFI->adjustsStack())|| needsStackRealignment(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()); @@ -509,7 +688,7 @@ needsStackRealignment(const MachineFunction &MF) const { bool ARMBaseRegisterInfo:: cannotEliminateFrame(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - if (DisableFramePointerElim(MF) && MFI->hasCalls()) + if (DisableFramePointerElim(MF) && MFI->adjustsStack()) return true; return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || needsStackRealignment(MF); @@ -545,24 +724,25 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { I != E; ++I) { for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { if (!I->getOperand(i).isFI()) continue; - - const TargetInstrDesc &Desc = TII.get(I->getOpcode()); - unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); - if (AddrMode == ARMII::AddrMode3 || - AddrMode == ARMII::AddrModeT2_i8) - return (1 << 8) - 1; - - if (AddrMode == ARMII::AddrMode5 || - AddrMode == ARMII::AddrModeT2_i8s4) + switch (I->getDesc().TSFlags & ARMII::AddrModeMask) { + case ARMII::AddrMode3: + case ARMII::AddrModeT2_i8: + Limit = std::min(Limit, (1U << 8) - 1); + break; + case ARMII::AddrMode5: + case ARMII::AddrModeT2_i8s4: Limit = std::min(Limit, ((1U << 8) - 1) * 4); - - if (AddrMode == ARMII::AddrModeT2_i12 && hasFP(MF)) - // When the stack offset is negative, we will end up using - // the i8 instructions instead. - return (1 << 8) - 1; - - if (AddrMode == ARMII::AddrMode6) + break; + case ARMII::AddrModeT2_i12: + if (hasFP(MF)) Limit = std::min(Limit, (1U << 8) - 1); + break; + case ARMII::AddrMode6: + // Addressing mode 6 (load/store) instructions can't encode an + // immediate offset for stack references. return 0; + default: + break; + } break; // At most one FI per instruction } } @@ -750,7 +930,9 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, while (NumExtras && !UnspilledCS1GPRs.empty()) { unsigned Reg = UnspilledCS1GPRs.back(); UnspilledCS1GPRs.pop_back(); - if (!isReservedReg(MF, Reg)) { + if (!isReservedReg(MF, Reg) && + (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) || + Reg == ARM::LR)) { Extras.push_back(Reg); NumExtras--; } diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 456c392..2c9c82d 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -81,6 +81,15 @@ public: getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass *B, unsigned Idx) const; + /// canCombinedSubRegIndex - Given a register class and a list of sub-register + /// indices, return true if it's possible to combine the sub-register indices + /// into one that corresponds to a larger sub-register. Return the new sub- + /// register index by reference. Note the new index by be zero if the given + /// sub-registers combined to form the whole register. + virtual bool canCombinedSubRegIndex(const TargetRegisterClass *RC, + SmallVectorImpl<unsigned> &SubIndices, + unsigned &NewSubIdx) const; + const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator> diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index f84f85a..f2730fc 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -88,6 +88,7 @@ namespace { void emitWordLE(unsigned Binary); void emitDWordLE(uint64_t Binary); void emitConstPoolInstruction(const MachineInstr &MI); + void emitMOVi32immInstruction(const MachineInstr &MI); void emitMOVi2piecesInstruction(const MachineInstr &MI); void emitLEApcrelJTInstruction(const MachineInstr &MI); void emitPseudoMoveInstruction(const MachineInstr &MI); @@ -145,6 +146,15 @@ namespace { return getMachineOpValue(MI, MI.getOperand(OpIdx)); } + /// getMovi32Value - Return binary encoding of operand for movw/movt. If the + /// machine operand requires relocation, record the relocation and return zero. + unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO, + unsigned Reloc); + unsigned getMovi32Value(const MachineInstr &MI, unsigned OpIdx, + unsigned Reloc) { + return getMovi32Value(MI, MI.getOperand(OpIdx), Reloc); + } + /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value. /// unsigned getShiftOp(unsigned Imm) const ; @@ -217,6 +227,31 @@ unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const { return 0; } +/// getMovi32Value - Return binary encoding of operand for movw/movt. If the +/// machine operand requires relocation, record the relocation and return zero. +unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI, + const MachineOperand &MO, + unsigned Reloc) { + assert(((Reloc == ARM::reloc_arm_movt) || (Reloc == ARM::reloc_arm_movw)) + && "Relocation to this function should be for movt or movw"); + + if (MO.isImm()) + return static_cast<unsigned>(MO.getImm()); + else if (MO.isGlobal()) + emitGlobalAddress(MO.getGlobal(), Reloc, true, false); + else if (MO.isSymbol()) + emitExternalSymbolAddress(MO.getSymbolName(), Reloc); + else if (MO.isMBB()) + emitMachineBasicBlock(MO.getMBB(), Reloc); + else { +#ifndef NDEBUG + errs() << MO; +#endif + llvm_unreachable("Unsupported operand type for movw/movt"); + } + return 0; +} + /// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI, @@ -438,6 +473,42 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) { } } +void ARMCodeEmitter::emitMOVi32immInstruction(const MachineInstr &MI) { + const MachineOperand &MO0 = MI.getOperand(0); + const MachineOperand &MO1 = MI.getOperand(1); + + // Emit the 'movw' instruction. + unsigned Binary = 0x30 << 20; // mov: Insts{27-20} = 0b00110000 + + unsigned Lo16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movw) & 0xFFFF; + + // Set the conditional execution predicate. + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; + + // Encode imm16 as imm4:imm12 + Binary |= Lo16 & 0xFFF; // Insts{11-0} = imm12 + Binary |= ((Lo16 >> 12) & 0xF) << 16; // Insts{19-16} = imm4 + emitWordLE(Binary); + + unsigned Hi16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movt) >> 16; + // Emit the 'movt' instruction. + Binary = 0x34 << 20; // movt: Insts{27-20} = 0b00110100 + + // Set the conditional execution predicate. + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; + + // Encode imm16 as imm4:imm1, same as movw above. + Binary |= Hi16 & 0xFFF; + Binary |= ((Hi16 >> 12) & 0xF) << 16; + emitWordLE(Binary); +} + void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) { const MachineOperand &MO0 = MI.getOperand(0); const MachineOperand &MO1 = MI.getOperand(1); @@ -557,7 +628,6 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { switch (Opcode) { default: llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction"); - // FIXME: Add support for MOVimm32. case TargetOpcode::INLINEASM: { // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. @@ -604,6 +674,11 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { emitMiscLoadStoreInstruction(MI, ARM::PC); break; } + + case ARM::MOVi32imm: + emitMOVi32immInstruction(MI); + break; + case ARM::MOVi2pieces: // Two instructions to materialize a constant. emitMOVi2piecesInstruction(MI); @@ -706,10 +781,6 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, unsigned ImplicitRn) { const TargetInstrDesc &TID = MI.getDesc(); - if (TID.Opcode == ARM::BFC) { - report_fatal_error("ARMv6t2 JIT is not yet supported."); - } - // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -729,6 +800,45 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRd) << ARMII::RegRdShift); + if (TID.Opcode == ARM::MOVi16) { + // Get immediate from MI. + unsigned Lo16 = getMovi32Value(MI, MI.getOperand(OpIdx), + ARM::reloc_arm_movw); + // Encode imm which is the same as in emitMOVi32immInstruction(). + Binary |= Lo16 & 0xFFF; + Binary |= ((Lo16 >> 12) & 0xF) << 16; + emitWordLE(Binary); + return; + } else if(TID.Opcode == ARM::MOVTi16) { + unsigned Hi16 = (getMovi32Value(MI, MI.getOperand(OpIdx), + ARM::reloc_arm_movt) >> 16); + Binary |= Hi16 & 0xFFF; + Binary |= ((Hi16 >> 12) & 0xF) << 16; + emitWordLE(Binary); + return; + } else if ((TID.Opcode == ARM::BFC) || (TID.Opcode == ARM::BFI)) { + uint32_t v = ~MI.getOperand(2).getImm(); + int32_t lsb = CountTrailingZeros_32(v); + int32_t msb = (32 - CountLeadingZeros_32(v)) - 1; + // Instr{20-16} = msb, Instr{11-7} = lsb + Binary |= (msb & 0x1F) << 16; + Binary |= (lsb & 0x1F) << 7; + emitWordLE(Binary); + return; + } else if ((TID.Opcode == ARM::UBFX) || (TID.Opcode == ARM::SBFX)) { + // Encode Rn in Instr{0-3} + Binary |= getMachineOpValue(MI, OpIdx++); + + uint32_t lsb = MI.getOperand(OpIdx++).getImm(); + uint32_t widthm1 = MI.getOperand(OpIdx++).getImm() - 1; + + // Instr{20-16} = widthm1, Instr{11-7} = lsb + Binary |= (widthm1 & 0x1F) << 16; + Binary |= (lsb & 0x1F) << 7; + emitWordLE(Binary); + return; + } + // If this is a two-address operand, skip it. e.g. MOVCCr operand 1. if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) ++OpIdx; @@ -1366,18 +1476,66 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) { break; ++NumRegs; } - Binary |= NumRegs * 2; + // Bit 8 will be set if <list> is consecutive 64-bit registers (e.g., D0) + // Otherwise, it will be 0, in the case of 32-bit registers. + if(Binary & 0x100) + Binary |= NumRegs * 2; + else + Binary |= NumRegs; emitWordLE(Binary); } void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) { + unsigned Opcode = MI.getDesc().Opcode; // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); // Set the conditional execution predicate Binary |= II->getPredicate(&MI) << ARMII::CondShift; + switch(Opcode) { + default: + llvm_unreachable("ARMCodeEmitter::emitMiscInstruction"); + + case ARM::FMSTAT: + // No further encoding needed. + break; + + case ARM::VMRS: + case ARM::VMSR: { + const MachineOperand &MO0 = MI.getOperand(0); + // Encode Rt. + Binary |= ARMRegisterInfo::getRegisterNumbering(MO0.getReg()) + << ARMII::RegRdShift; + break; + } + + case ARM::FCONSTD: + case ARM::FCONSTS: { + // Encode Dd / Sd. + Binary |= encodeVFPRd(MI, 0); + + // Encode imm., Table A7-18 VFP modified immediate constants + const MachineOperand &MO1 = MI.getOperand(1); + unsigned Imm = static_cast<unsigned>(MO1.getFPImm()->getValueAPF() + .bitcastToAPInt().getHiBits(32).getLimitedValue()); + unsigned ModifiedImm; + + if(Opcode == ARM::FCONSTS) + ModifiedImm = (Imm & 0x80000000) >> 24 | // a + (Imm & 0x03F80000) >> 19; // bcdefgh + else // Opcode == ARM::FCONSTD + ModifiedImm = (Imm & 0x80000000) >> 24 | // a + (Imm & 0x007F0000) >> 16; // bcdefgh + + // Insts{19-16} = abcd, Insts{3-0} = efgh + Binary |= ((ModifiedImm & 0xF0) >> 4) << 16; + Binary |= (ModifiedImm & 0xF); + break; + } + } + emitWordLE(Binary); } diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 845d088..c87f5d7 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -29,6 +29,7 @@ namespace { ARMExpandPseudo() : MachineFunctionPass(&ID) {} const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; virtual bool runOnMachineFunction(MachineFunction &Fn); @@ -37,11 +38,31 @@ namespace { } private: + void TransferImpOps(MachineInstr &OldMI, + MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI); bool ExpandMBB(MachineBasicBlock &MBB); }; char ARMExpandPseudo::ID = 0; } +/// TransferImpOps - Transfer implicit operands on the pseudo instruction to +/// the instructions created from the expansion. +void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI, + MachineInstrBuilder &UseMI, + MachineInstrBuilder &DefMI) { + const TargetInstrDesc &Desc = OldMI.getDesc(); + for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); + i != e; ++i) { + const MachineOperand &MO = OldMI.getOperand(i); + assert(MO.isReg() && MO.getReg()); + if (MO.isUse()) + UseMI.addReg(MO.getReg(), getKillRegState(MO.isKill())); + else + DefMI.addReg(MO.getReg(), + getDefRegState(true) | getDeadRegState(MO.isDead())); + } +} + bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { bool Modified = false; @@ -58,52 +79,82 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic) ? ARM::tLDRpci : ARM::t2LDRpci; unsigned DstReg = MI.getOperand(0).getReg(); - if (!MI.getOperand(0).isDead()) { - MachineInstr *NewMI = - AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(NewLdOpc), DstReg) - .addOperand(MI.getOperand(1))); - NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD)) - .addReg(DstReg, getDefRegState(true)) - .addReg(DstReg) - .addOperand(MI.getOperand(2)); - } + bool DstIsDead = MI.getOperand(0).isDead(); + MachineInstrBuilder MIB1 = + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(NewLdOpc), DstReg) + .addOperand(MI.getOperand(1))); + (*MIB1).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::tPICADD)) + .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(DstReg) + .addOperand(MI.getOperand(2)); + TransferImpOps(MI, MIB1, MIB2); MI.eraseFromParent(); Modified = true; break; } + case ARM::t2MOVi32imm: { + unsigned PredReg = 0; + ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg); unsigned DstReg = MI.getOperand(0).getReg(); - if (!MI.getOperand(0).isDead()) { - const MachineOperand &MO = MI.getOperand(1); - MachineInstrBuilder LO16, HI16; - - LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16), - DstReg); - HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16)) - .addReg(DstReg, getDefRegState(true)).addReg(DstReg); - - if (MO.isImm()) { - unsigned Imm = MO.getImm(); - unsigned Lo16 = Imm & 0xffff; - unsigned Hi16 = (Imm >> 16) & 0xffff; - LO16 = LO16.addImm(Lo16); - HI16 = HI16.addImm(Hi16); - } else { - const GlobalValue *GV = MO.getGlobal(); - unsigned TF = MO.getTargetFlags(); - LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); - HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); - // FIXME: What's about memoperands? - } - AddDefaultPred(LO16); - AddDefaultPred(HI16); + bool DstIsDead = MI.getOperand(0).isDead(); + const MachineOperand &MO = MI.getOperand(1); + MachineInstrBuilder LO16, HI16; + + LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16), + DstReg); + HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16)) + .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(DstReg); + + if (MO.isImm()) { + unsigned Imm = MO.getImm(); + unsigned Lo16 = Imm & 0xffff; + unsigned Hi16 = (Imm >> 16) & 0xffff; + LO16 = LO16.addImm(Lo16); + HI16 = HI16.addImm(Hi16); + } else { + const GlobalValue *GV = MO.getGlobal(); + unsigned TF = MO.getTargetFlags(); + LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); + HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); } + (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + LO16.addImm(Pred).addReg(PredReg); + HI16.addImm(Pred).addReg(PredReg); + TransferImpOps(MI, LO16, HI16); + MI.eraseFromParent(); + Modified = true; + break; + } + + case ARM::VMOVQQ: { + unsigned DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + unsigned EvenDst = TRI->getSubReg(DstReg, ARM::qsub_0); + unsigned OddDst = TRI->getSubReg(DstReg, ARM::qsub_1); + unsigned SrcReg = MI.getOperand(1).getReg(); + bool SrcIsKill = MI.getOperand(1).isKill(); + unsigned EvenSrc = TRI->getSubReg(SrcReg, ARM::qsub_0); + unsigned OddSrc = TRI->getSubReg(SrcReg, ARM::qsub_1); + MachineInstrBuilder Even = + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::VMOVQ)) + .addReg(EvenDst, getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(EvenSrc, getKillRegState(SrcIsKill))); + MachineInstrBuilder Odd = + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::VMOVQ)) + .addReg(OddDst, getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(OddSrc, getKillRegState(SrcIsKill))); + TransferImpOps(MI, Even, Odd); MI.eraseFromParent(); Modified = true; } - // FIXME: expand t2MOVi32imm } MBBI = NMBBI; } @@ -113,6 +164,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); bool Modified = false; for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 616942c..9baef6b 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -37,7 +37,8 @@ using namespace llvm; static cl::opt<bool> UseRegSeq("neon-reg-sequence", cl::Hidden, - cl::desc("Use reg_sequence to model ld / st of multiple neon regs")); + cl::desc("Use reg_sequence to model ld / st of multiple neon regs"), + cl::init(true)); //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine @@ -164,15 +165,34 @@ private: ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); + SDNode *SelectConcatVector(SDNode *N); + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps); - /// PairDRegs - Insert a pair of double registers into an implicit def to - /// form a quad register. + /// PairDRegs - Form a quad register from a pair of D registers. + /// SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1); + + /// PairDRegs - Form a quad register pair from a pair of Q registers. + /// + SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1); + + /// QuadDRegs - Form a quad register pair from a quad of D registers. + /// + SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); + + /// QuadQRegs - Form 4 consecutive Q registers. + /// + SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); + + /// OctoDRegs - Form 8 consecutive D registers. + /// + SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3, + SDValue V4, SDValue V5, SDValue V6, SDValue V7); }; } @@ -940,13 +960,13 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { return NULL; } -/// PairDRegs - Insert a pair of double registers into an implicit def to -/// form a quad register. +/// PairDRegs - Form a quad register from a pair of D registers. +/// SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { DebugLoc dl = V0.getNode()->getDebugLoc(); - SDValue SubReg0 = CurDAG->getTargetConstant(ARM::DSUBREG_0, MVT::i32); - SDValue SubReg1 = CurDAG->getTargetConstant(ARM::DSUBREG_1, MVT::i32); - if (UseRegSeq) { + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); + if (llvm::ModelWithRegSequence()) { const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 }; return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); } @@ -958,6 +978,62 @@ SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { VT, SDValue(Pair, 0), V1, SubReg1); } +/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers. +/// +SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); +} + +/// QuadDRegs - Form 4 consecutive D registers. +/// +SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1, + SDValue V2, SDValue V3) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); + SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32); + SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8); +} + +/// QuadQRegs - Form 4 consecutive Q registers. +/// +SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1, + SDValue V2, SDValue V3) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32); + SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, MVT::i32); + SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8); +} + +/// OctoDRegs - Form 8 consecutive D registers. +/// +SDNode *ARMDAGToDAGISel::OctoDRegs(EVT VT, SDValue V0, SDValue V1, + SDValue V2, SDValue V3, + SDValue V4, SDValue V5, + SDValue V6, SDValue V7) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); + SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32); + SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32); + SDValue SubReg4 = CurDAG->getTargetConstant(ARM::dsub_4, MVT::i32); + SDValue SubReg5 = CurDAG->getTargetConstant(ARM::dsub_5, MVT::i32); + SDValue SubReg6 = CurDAG->getTargetConstant(ARM::dsub_6, MVT::i32); + SDValue SubReg7 = CurDAG->getTargetConstant(ARM::dsub_7, MVT::i32); + const SDValue Ops[] ={ V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3, + V4, SubReg4, V5, SubReg5, V6, SubReg6, V7, SubReg7 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 16); +} + /// GetNEONSubregVT - Given a type for a 128-bit NEON vector, return the type /// for a 64-bit subregister of the vector. static EVT GetNEONSubregVT(EVT VT) { @@ -1011,7 +1087,34 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain }; std::vector<EVT> ResTys(NumVecs, VT); ResTys.push_back(MVT::Other); - return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); + if (!llvm::ModelWithRegSequence() || NumVecs < 2) + return VLd; + + SDValue RegSeq; + SDValue V0 = SDValue(VLd, 0); + SDValue V1 = SDValue(VLd, 1); + + // Form a REG_SEQUENCE to force register allocation. + if (NumVecs == 2) + RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + else { + SDValue V2 = SDValue(VLd, 2); + // If it's a vld3, form a quad D-register but discard the last part. + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : SDValue(VLd, 3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec, + dl, VT, RegSeq); + ReplaceUses(SDValue(N, Vec), D); + } + ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs)); + return NULL; } EVT RegVT = GetNEONSubregVT(VT); @@ -1026,9 +1129,24 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, Chain = SDValue(VLd, 2 * NumVecs); // Combine the even and odd subregs to produce the result. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDNode *Q = PairDRegs(VT, SDValue(VLd, 2*Vec), SDValue(VLd, 2*Vec+1)); - ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); + if (llvm::ModelWithRegSequence()) { + if (NumVecs == 1) { + SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1)); + ReplaceUses(SDValue(N, 0), SDValue(Q, 0)); + } else { + SDValue QQ = SDValue(QuadDRegs(MVT::v4i64, + SDValue(VLd, 0), SDValue(VLd, 1), + SDValue(VLd, 2), SDValue(VLd, 3)), 0); + SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ); + SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ); + ReplaceUses(SDValue(N, 0), Q0); + ReplaceUses(SDValue(N, 1), Q1); + } + } else { + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDNode *Q = PairDRegs(VT, SDValue(VLd, 2*Vec), SDValue(VLd, 2*Vec+1)); + ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); + } } } else { // Otherwise, quad registers are loaded with two separate instructions, @@ -1051,10 +1169,37 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6); Chain = SDValue(VLdB, NumVecs+1); - // Combine the even and odd subregs to produce the result. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec)); - ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); + if (llvm::ModelWithRegSequence()) { + SDValue V0 = SDValue(VLdA, 0); + SDValue V1 = SDValue(VLdB, 0); + SDValue V2 = SDValue(VLdA, 1); + SDValue V3 = SDValue(VLdB, 1); + SDValue V4 = SDValue(VLdA, 2); + SDValue V5 = SDValue(VLdB, 2); + SDValue V6 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), + 0) + : SDValue(VLdA, 3); + SDValue V7 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), + 0) + : SDValue(VLdB, 3); + SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3, + V4, V5, V6, V7), 0); + + // Extract out the 3 / 4 Q registers. + assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec, + dl, VT, RegSeq); + ReplaceUses(SDValue(N, Vec), Q); + } + } else { + // Combine the even and odd subregs to produce the result. + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec)); + ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); + } } } ReplaceUses(SDValue(N, NumVecs), Chain); @@ -1102,12 +1247,43 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, Ops.push_back(Align); if (is64BitVector) { - unsigned Opc = DOpcodes[OpcodeIndex]; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(N->getOperand(Vec+3)); + if (llvm::ModelWithRegSequence() && NumVecs >= 2) { + SDValue RegSeq; + SDValue V0 = N->getOperand(0+3); + SDValue V1 = N->getOperand(1+3); + + // Form a REG_SEQUENCE to force register allocation. + if (NumVecs == 2) + RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + else { + SDValue V2 = N->getOperand(2+3); + // If it's a vld3, form a quad D-register and leave the last part as + // an undef. + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : N->getOperand(3+3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + + // Now extract the D registers back out. + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, + RegSeq)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, + RegSeq)); + if (NumVecs > 2) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, + RegSeq)); + if (NumVecs > 3) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, + RegSeq)); + } else { + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(N->getOperand(Vec+3)); + } Ops.push_back(Pred); Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); + unsigned Opc = DOpcodes[OpcodeIndex]; return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5); } @@ -1116,48 +1292,114 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, // Quad registers are directly supported for VST1 and VST2, // storing pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, - N->getOperand(Vec+3))); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, - N->getOperand(Vec+3))); + if (llvm::ModelWithRegSequence() && NumVecs == 2) { + // First extract the pair of Q registers. + SDValue Q0 = N->getOperand(3); + SDValue Q1 = N->getOperand(4); + + // Form a QQ register. + SDValue QQ = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0); + + // Now extract the D registers back out. + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, + QQ)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, + QQ)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, RegVT, + QQ)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, RegVT, + QQ)); + Ops.push_back(Pred); + Ops.push_back(Reg0); // predicate register + Ops.push_back(Chain); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 5 + 4); + } else { + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, + N->getOperand(Vec+3))); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, + N->getOperand(Vec+3))); + } + Ops.push_back(Pred); + Ops.push_back(Reg0); // predicate register + Ops.push_back(Chain); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), + 5 + 2 * NumVecs); } - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), - 5 + 2 * NumVecs); } // Otherwise, quad registers are stored with two separate instructions, // where one stores the even registers and the other stores the odd registers. + if (llvm::ModelWithRegSequence()) { + // Form the QQQQ REG_SEQUENCE. + SDValue V[8]; + for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { + V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, + N->getOperand(Vec+3)); + V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, + N->getOperand(Vec+3)); + } + if (NumVecs == 3) + V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); - Ops.push_back(Reg0); // post-access address offset + SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], + V[4], V[5], V[6], V[7]), 0); - // Store the even subregs. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, - N->getOperand(Vec+3))); - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - unsigned Opc = QOpcodes0[OpcodeIndex]; - SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); - Chain = SDValue(VStA, 1); - - // Store the odd subregs. - Ops[0] = SDValue(VStA, 0); // MemAddr - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, - N->getOperand(Vec+3)); - Ops[NumVecs+5] = Chain; - Opc = QOpcodes1[OpcodeIndex]; - SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); - Chain = SDValue(VStB, 1); - ReplaceUses(SDValue(N, 0), Chain); - return NULL; + // Store the even D registers. + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + Ops.push_back(Reg0); // post-access address offset + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl, + RegVT, RegSeq)); + Ops.push_back(Pred); + Ops.push_back(Reg0); // predicate register + Ops.push_back(Chain); + unsigned Opc = QOpcodes0[OpcodeIndex]; + SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+6); + Chain = SDValue(VStA, 1); + + // Store the odd D registers. + Ops[0] = SDValue(VStA, 0); // MemAddr + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl, + RegVT, RegSeq); + Ops[NumVecs+5] = Chain; + Opc = QOpcodes1[OpcodeIndex]; + SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+6); + Chain = SDValue(VStB, 1); + ReplaceUses(SDValue(N, 0), Chain); + return NULL; + } else { + Ops.push_back(Reg0); // post-access address offset + + // Store the even subregs. + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, + N->getOperand(Vec+3))); + Ops.push_back(Pred); + Ops.push_back(Reg0); // predicate register + Ops.push_back(Chain); + unsigned Opc = QOpcodes0[OpcodeIndex]; + SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+6); + Chain = SDValue(VStA, 1); + + // Store the odd subregs. + Ops[0] = SDValue(VStA, 0); // MemAddr + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, + N->getOperand(Vec+3)); + Ops[NumVecs+5] = Chain; + Opc = QOpcodes1[OpcodeIndex]; + SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+6); + Chain = SDValue(VStB, 1); + ReplaceUses(SDValue(N, 0), Chain); + return NULL; + } } SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, @@ -1180,11 +1422,13 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, // Quad registers are handled by load/store of subregs. Find the subreg info. unsigned NumElts = 0; int SubregIdx = 0; + bool Even = false; EVT RegVT = VT; if (!is64BitVector) { RegVT = GetNEONSubregVT(VT); NumElts = RegVT.getVectorNumElements(); - SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1; + SubregIdx = (Lane < NumElts) ? ARM::dsub_0 : ARM::dsub_1; + Even = Lane < NumElts; } unsigned OpcodeIndex; @@ -1211,8 +1455,35 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned Opc = 0; if (is64BitVector) { Opc = DOpcodes[OpcodeIndex]; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(N->getOperand(Vec+3)); + if (llvm::ModelWithRegSequence()) { + SDValue RegSeq; + SDValue V0 = N->getOperand(0+3); + SDValue V1 = N->getOperand(1+3); + if (NumVecs == 2) { + RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + } else { + SDValue V2 = N->getOperand(2+3); + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : N->getOperand(3+3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + + // Now extract the D registers back out. + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, + RegSeq)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, + RegSeq)); + if (NumVecs > 2) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, + RegSeq)); + if (NumVecs > 3) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, + RegSeq)); + } else { + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(N->getOperand(Vec+3)); + } } else { // Check if this is loading the even or odd subreg of a Q register. if (Lane < NumElts) { @@ -1221,10 +1492,32 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, Lane -= NumElts; Opc = QOpcodes1[OpcodeIndex]; } - // Extract the subregs of the input vector. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, - N->getOperand(Vec+3))); + + if (llvm::ModelWithRegSequence()) { + SDValue RegSeq; + SDValue V0 = N->getOperand(0+3); + SDValue V1 = N->getOperand(1+3); + if (NumVecs == 2) { + RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0); + } else { + SDValue V2 = N->getOperand(2+3); + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : N->getOperand(3+3); + RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0); + } + + // Extract the subregs of the input vector. + unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1; + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT, + RegSeq)); + } else { + // Extract the subregs of the input vector. + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, + N->getOperand(Vec+3))); + } } Ops.push_back(getI32Imm(Lane)); Ops.push_back(Pred); @@ -1236,8 +1529,60 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, std::vector<EVT> ResTys(NumVecs, RegVT); ResTys.push_back(MVT::Other); - SDNode *VLdLn = - CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+6); + SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(),NumVecs+6); + + if (llvm::ModelWithRegSequence()) { + // Form a REG_SEQUENCE to force register allocation. + SDValue RegSeq; + if (is64BitVector) { + SDValue V0 = SDValue(VLdLn, 0); + SDValue V1 = SDValue(VLdLn, 1); + if (NumVecs == 2) { + RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + } else { + SDValue V2 = SDValue(VLdLn, 2); + // If it's a vld3, form a quad D-register but discard the last part. + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : SDValue(VLdLn, 3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + } else { + // For 128-bit vectors, take the 64-bit results of the load and insert them + // as subregs into the result. + SDValue V[8]; + for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { + if (Even) { + V[i] = SDValue(VLdLn, Vec); + V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); + } else { + V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); + V[i+1] = SDValue(VLdLn, Vec); + } + } + if (NumVecs == 3) + V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); + + if (NumVecs == 2) + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0); + else + RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], + V[4], V[5], V[6], V[7]), 0); + } + + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); + unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + ReplaceUses(SDValue(N, Vec), + CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq)); + ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs)); + return NULL; + } + // For a 64-bit vector load to D registers, nothing more needs to be done. if (is64BitVector) return VLdLn; @@ -1481,6 +1826,21 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); } +SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { + // The only time a CONCAT_VECTORS operation can have legal types is when + // two 64-bit vectors are concatenated to a 128-bit vector. + EVT VT = N->getValueType(0); + if (!VT.is128BitVector() || N->getNumOperands() != 2) + llvm_unreachable("unexpected CONCAT_VECTORS"); + DebugLoc dl = N->getDebugLoc(); + SDValue V0 = N->getOperand(0); + SDValue V1 = N->getOperand(1); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); +} + SDNode *ARMDAGToDAGISel::Select(SDNode *N) { DebugLoc dl = N->getDebugLoc(); @@ -1695,8 +2055,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain }; - return CurDAG->getMachineNode(ARM::VLDMQ, dl, MVT::v2f64, MVT::Other, - Ops, 5); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); + SDNode *Ret = CurDAG->getMachineNode(ARM::VLDMQ, dl, + MVT::v2f64, MVT::Other, Ops, 5); + cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); + return Ret; } // Other cases are autogenerated. break; @@ -1712,7 +2076,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(1), N->getOperand(2), AM5Opc, Pred, PredReg, Chain }; - return CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); + SDNode *Ret = CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6); + cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); + return Ret; } // Other cases are autogenerated. break; @@ -1971,7 +2339,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); } } + break; } + + case ISD::CONCAT_VECTORS: + return SelectConcatVector(N); } return SelectCode(N); @@ -1995,3 +2367,9 @@ FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel) { return new ARMDAGToDAGISel(TM, OptLevel); } + +/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model +/// operations involving sub-registers. +bool llvm::ModelWithRegSequence() { + return UseRegSeq; +} diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index d3842a6..b8126a3 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -94,7 +94,10 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, } setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); - setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom); + if (llvm::ModelWithRegSequence()) + setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); + else + setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand); setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); @@ -360,8 +363,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::BSWAP, MVT::i32, Expand); // These are expanded into libcalls. - setOperationAction(ISD::SDIV, MVT::i32, Expand); - setOperationAction(ISD::UDIV, MVT::i32, Expand); + if (!Subtarget->hasDivide()) { + // v7M has a hardware divider + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + } setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); setOperationAction(ISD::SDIVREM, MVT::i32, Expand); @@ -373,6 +379,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); + setOperationAction(ISD::TRAP, MVT::Other, Legal); + // Use the default implementation. setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::Other, Expand); @@ -387,7 +395,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); - if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) { + // If the subtarget does not have extract instructions, sign_extend_inreg + // needs to be expanded. Extract is available in ARM mode on v6 and up, + // and on most Thumb2 implementations. + if ((!Subtarget->isThumb() && !Subtarget->hasV6Ops()) + || (Subtarget->isThumb2() && !Subtarget->hasT2ExtractPack())) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); } @@ -400,6 +412,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); + setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); @@ -451,9 +465,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // ARMISD::VMOVRRD - No need to call setTargetDAGCombine setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::SUB); + setTargetDAGCombine(ISD::MUL); setStackPointerRegisterToSaveRestore(ARM::SP); - setSchedulingPreference(SchedulingForRegPressure); + + if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) + setSchedulingPreference(Sched::RegPressure); + else + setSchedulingPreference(Sched::Hybrid); // FIXME: If-converter should use instruction latency to determine // profitability rather than relying on fixed limits. @@ -567,11 +586,35 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { } } +/// getRegClassFor - Return the register class that should be used for the +/// specified value type. +TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { + // Map v4i64 to QQ registers but do not make the type legal. Similarly map + // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to + // load / store 4 to 8 consecutive D registers. + if (Subtarget->hasNEON()) { + if (VT == MVT::v4i64) + return ARM::QQPRRegisterClass; + else if (VT == MVT::v8i64) + return ARM::QQQQPRRegisterClass; + } + return TargetLowering::getRegClassFor(VT); +} + /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 0 : 1; } +Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + EVT VT = N->getValueType(i); + if (VT.isFloatingPoint() || VT.isVector()) + return Sched::Latency; + } + return Sched::RegPressure; +} + //===----------------------------------------------------------------------===// // Lowering Code //===----------------------------------------------------------------------===// @@ -1507,6 +1550,23 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, } SDValue +ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue Val = Subtarget->isThumb() ? + DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) : + DAG.getConstant(0, MVT::i32); + return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0), + Op.getOperand(1), Val); +} + +SDValue +ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), + Op.getOperand(1), DAG.getConstant(0, MVT::i32)); +} + +SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const { @@ -1545,12 +1605,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, } return Result; } - case Intrinsic::eh_sjlj_setjmp: - SDValue Val = Subtarget->isThumb() ? - DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) : - DAG.getConstant(0, MVT::i32); - return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(1), - Val); } } @@ -1652,7 +1706,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, RC = ARM::GPRRegisterClass; // Transform the arguments stored in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); SDValue ArgValue2; @@ -2092,9 +2146,31 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp); } +SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MFI->setReturnAddressIsTaken(true); + + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + if (Depth) { + SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); + SDValue Offset = DAG.getConstant(4, MVT::i32); + return DAG.getLoad(VT, dl, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), + NULL, 0, false, false, 0); + } + + // Return LR, which contains the return address. Mark it an implicit live-in. + unsigned Reg = MF.addLiveIn(ARM::LR, ARM::GPRRegisterClass); + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); +} + SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); @@ -2107,116 +2183,6 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } -SDValue -ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, - const Value *DstSV, - uint64_t DstSVOff, - const Value *SrcSV, - uint64_t SrcSVOff) const { - // Do repeated 4-byte loads and stores. To be improved. - // This requires 4-byte alignment. - if ((Align & 3) != 0) - return SDValue(); - // This requires the copy size to be a constant, preferrably - // within a subtarget-specific limit. - ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); - if (!ConstantSize) - return SDValue(); - uint64_t SizeVal = ConstantSize->getZExtValue(); - if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) - return SDValue(); - - unsigned BytesLeft = SizeVal & 3; - unsigned NumMemOps = SizeVal >> 2; - unsigned EmittedNumMemOps = 0; - EVT VT = MVT::i32; - unsigned VTSize = 4; - unsigned i = 0; - const unsigned MAX_LOADS_IN_LDM = 6; - SDValue TFOps[MAX_LOADS_IN_LDM]; - SDValue Loads[MAX_LOADS_IN_LDM]; - uint64_t SrcOff = 0, DstOff = 0; - - // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the - // same number of stores. The loads and stores will get combined into - // ldm/stm later on. - while (EmittedNumMemOps < NumMemOps) { - for (i = 0; - i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { - Loads[i] = DAG.getLoad(VT, dl, Chain, - DAG.getNode(ISD::ADD, dl, MVT::i32, Src, - DAG.getConstant(SrcOff, MVT::i32)), - SrcSV, SrcSVOff + SrcOff, isVolatile, false, 0); - TFOps[i] = Loads[i].getValue(1); - SrcOff += VTSize; - } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); - - for (i = 0; - i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { - TFOps[i] = DAG.getStore(Chain, dl, Loads[i], - DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, - DAG.getConstant(DstOff, MVT::i32)), - DstSV, DstSVOff + DstOff, isVolatile, false, 0); - DstOff += VTSize; - } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); - - EmittedNumMemOps += i; - } - - if (BytesLeft == 0) - return Chain; - - // Issue loads / stores for the trailing (1 - 3) bytes. - unsigned BytesLeftSave = BytesLeft; - i = 0; - while (BytesLeft) { - if (BytesLeft >= 2) { - VT = MVT::i16; - VTSize = 2; - } else { - VT = MVT::i8; - VTSize = 1; - } - - Loads[i] = DAG.getLoad(VT, dl, Chain, - DAG.getNode(ISD::ADD, dl, MVT::i32, Src, - DAG.getConstant(SrcOff, MVT::i32)), - SrcSV, SrcSVOff + SrcOff, false, false, 0); - TFOps[i] = Loads[i].getValue(1); - ++i; - SrcOff += VTSize; - BytesLeft -= VTSize; - } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); - - i = 0; - BytesLeft = BytesLeftSave; - while (BytesLeft) { - if (BytesLeft >= 2) { - VT = MVT::i16; - VTSize = 2; - } else { - VT = MVT::i8; - VTSize = 1; - } - - TFOps[i] = DAG.getStore(Chain, dl, Loads[i], - DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, - DAG.getConstant(DstOff, MVT::i32)), - DstSV, DstSVOff + DstOff, false, false, 0); - ++i; - DstOff += VTSize; - BytesLeft -= VTSize; - } - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); -} - /// ExpandBIT_CONVERT - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 @@ -2434,9 +2400,9 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), - DAG.getConstant(0, MVT::i32)); + DAG.getConstant(0, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), - DAG.getConstant(1, MVT::i32)); + DAG.getConstant(1, MVT::i32)); // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and // captures the result into a carry flag. @@ -2879,21 +2845,60 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { } } - // If there are only 2 elements in a 128-bit vector, insert them into an - // undef vector. This handles the common case for 128-bit vector argument - // passing, where the insertions should be translated to subreg accesses - // with no real instructions. - if (VT.is128BitVector() && Op.getNumOperands() == 2) { - SDValue Val = DAG.getUNDEF(VT); - SDValue Op0 = Op.getOperand(0); - SDValue Op1 = Op.getOperand(1); - if (Op0.getOpcode() != ISD::UNDEF) - Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op0, - DAG.getIntPtrConstant(0)); - if (Op1.getOpcode() != ISD::UNDEF) - Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op1, - DAG.getIntPtrConstant(1)); - return Val; + // Scan through the operands to see if only one value is used. + unsigned NumElts = VT.getVectorNumElements(); + bool isOnlyLowElement = true; + bool usesOnlyOneValue = true; + bool isConstant = true; + SDValue Value; + for (unsigned i = 0; i < NumElts; ++i) { + SDValue V = Op.getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + if (i > 0) + isOnlyLowElement = false; + if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) + isConstant = false; + + if (!Value.getNode()) + Value = V; + else if (V != Value) + usesOnlyOneValue = false; + } + + if (!Value.getNode()) + return DAG.getUNDEF(VT); + + if (isOnlyLowElement) + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); + + // If all elements are constants, fall back to the default expansion, which + // will generate a load from the constant pool. + if (isConstant) + return SDValue(); + + // Use VDUP for non-constant splats. + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + if (usesOnlyOneValue && EltSize <= 32) + return DAG.getNode(ARMISD::VDUP, dl, VT, Value); + + // Vectors with 32- or 64-bit elements can be built by directly assigning + // the subregisters. + if (EltSize >= 32) { + // Do the expansion with floating-point types, since that is what the VFP + // registers are defined to use, and since i64 is not legal. + EVT EltVT = EVT::getFloatingPointVT(EltSize); + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); + SDValue Val = DAG.getUNDEF(VecVT); + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Elt = Op.getOperand(i); + if (Elt.getOpcode() == ISD::UNDEF) + continue; + Elt = DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Elt); + Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val, Elt, + DAG.getConstant(i, MVT::i32)); + } + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); } return SDValue(); @@ -3083,8 +3088,8 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // If the shuffle is not directly supported and it has 4 elements, use // the PerfectShuffle-generated table to synthesize it from other shuffles. - if (VT.getVectorNumElements() == 4 && - (VT.is128BitVector() || VT.is64BitVector())) { + unsigned NumElts = VT.getVectorNumElements(); + if (NumElts == 4) { unsigned PFIndexes[4]; for (unsigned i = 0; i != 4; ++i) { if (ShuffleMask[i] < 0) @@ -3096,7 +3101,6 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // Compute the index in the perfect shuffle table. unsigned PFTableIndex = PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; - unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; unsigned Cost = (PFEntry >> 30); @@ -3104,6 +3108,29 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); } + // Implement shuffles with 32- or 64-bit elements as subreg copies. + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + if (EltSize >= 32) { + // Do the expansion with floating-point types, since that is what the VFP + // registers are defined to use, and since i64 is not legal. + EVT EltVT = EVT::getFloatingPointVT(EltSize); + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); + V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1); + V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2); + SDValue Val = DAG.getUNDEF(VecVT); + for (unsigned i = 0; i < NumElts; ++i) { + if (ShuffleMask[i] < 0) + continue; + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + ShuffleMask[i] < (int)NumElts ? V1 : V2, + DAG.getConstant(ShuffleMask[i] & (NumElts-1), + MVT::i32)); + Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val, + Elt, DAG.getConstant(i, MVT::i32)); + } + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); + } + return SDValue(); } @@ -3158,9 +3185,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); - case ISD::RETURNADDR: break; + case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); + case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); + case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, Subtarget); case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG); @@ -3667,6 +3696,62 @@ static SDValue PerformSUBCombine(SDNode *N, return SDValue(); } +static SDValue PerformMULCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + SelectionDAG &DAG = DCI.DAG; + + if (Subtarget->isThumb1Only()) + return SDValue(); + + if (DAG.getMachineFunction(). + getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + return SDValue(); + + if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i32) + return SDValue(); + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!C) + return SDValue(); + + uint64_t MulAmt = C->getZExtValue(); + unsigned ShiftAmt = CountTrailingZeros_64(MulAmt); + ShiftAmt = ShiftAmt & (32 - 1); + SDValue V = N->getOperand(0); + DebugLoc DL = N->getDebugLoc(); + + SDValue Res; + MulAmt >>= ShiftAmt; + if (isPowerOf2_32(MulAmt - 1)) { + // (mul x, 2^N + 1) => (add (shl x, N), x) + Res = DAG.getNode(ISD::ADD, DL, VT, + V, DAG.getNode(ISD::SHL, DL, VT, + V, DAG.getConstant(Log2_32(MulAmt-1), + MVT::i32))); + } else if (isPowerOf2_32(MulAmt + 1)) { + // (mul x, 2^N - 1) => (sub (shl x, N), x) + Res = DAG.getNode(ISD::SUB, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, + V, DAG.getConstant(Log2_32(MulAmt+1), + MVT::i32)), + V); + } else + return SDValue(); + + if (ShiftAmt != 0) + Res = DAG.getNode(ISD::SHL, DL, VT, Res, + DAG.getConstant(ShiftAmt, MVT::i32)); + + // Do not add new nodes to DAG combiner worklist. + DCI.CombineTo(N, Res, false); + return SDValue(); +} + /// PerformVMOVRRDCombine - Target-specific dag combine xforms for /// ARMISD::VMOVRRD. static SDValue PerformVMOVRRDCombine(SDNode *N, @@ -4053,6 +4138,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, default: break; case ISD::ADD: return PerformADDCombine(N, DCI); case ISD::SUB: return PerformSUBCombine(N, DCI); + case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); case ISD::SHL: @@ -4432,9 +4518,11 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, bool isSEXTLoad = false; if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { VT = LD->getMemoryVT(); + Ptr = LD->getBasePtr(); isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { VT = ST->getMemoryVT(); + Ptr = ST->getBasePtr(); } else return false; @@ -4442,13 +4530,25 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, bool isLegal = false; if (Subtarget->isThumb2()) isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, - isInc, DAG); + isInc, DAG); else isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, isInc, DAG); if (!isLegal) return false; + if (Ptr != Base) { + // Swap base ptr and offset to catch more post-index load / store when + // it's legal. In Thumb2 mode, offset must be an immediate. + if (Ptr == Offset && Op->getOpcode() == ISD::ADD && + !Subtarget->isThumb2()) + std::swap(Base, Offset); + + // Post-indexed load / store update the base pointer. + if (Ptr != Base) + return false; + } + AM = isInc ? ISD::POST_INC : ISD::POST_DEC; return true; } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index d8a230f..9c7517c 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -236,13 +236,19 @@ namespace llvm { std::vector<SDValue> &Ops, SelectionDAG &DAG) const; - virtual const ARMSubtarget* getSubtarget() const { + const ARMSubtarget* getSubtarget() const { return Subtarget; } + /// getRegClassFor - Return the register class that should be used for the + /// specified value type. + virtual TargetRegisterClass *getRegClassFor(EVT VT) const; + /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; + Sched::Preference getSchedulingPreference(SDNode *N) const; + bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; @@ -281,7 +287,8 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const; - SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; @@ -296,20 +303,12 @@ namespace llvm { SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; - SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, - const Value *DstSV, - uint64_t DstSVOff, - const Value *SrcSV, - uint64_t SrcSVOff) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index b466d0d..d487df1 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -228,7 +228,7 @@ class PseudoInst<dag oops, dag iops, InstrItinClass itin, "", itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = asm; + let AsmString = asm; let Pattern = pattern; } @@ -240,7 +240,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat(opc, !strconcat("${p}", asm)); + let AsmString = !strconcat(opc, !strconcat("${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsARM]; } @@ -252,7 +252,7 @@ class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = !strconcat(opc, asm); + let AsmString = !strconcat(opc, asm); let Pattern = pattern; let isPredicable = 0; list<Predicate> Predicates = [IsARM]; @@ -268,7 +268,7 @@ class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p, cc_out:$s)); - let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm)); + let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsARM]; } @@ -280,7 +280,7 @@ class XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = asm; + let AsmString = asm; let Pattern = pattern; list<Predicate> Predicates = [IsARM]; } @@ -959,7 +959,7 @@ class ThumbI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = asm; + let AsmString = asm; let Pattern = pattern; list<Predicate> Predicates = [IsThumb]; } @@ -995,7 +995,7 @@ class Thumb1I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = asm; + let AsmString = asm; let Pattern = pattern; list<Predicate> Predicates = [IsThumb1Only]; } @@ -1140,7 +1140,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p, cc_out:$s)); - let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm)); + let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; } @@ -1152,7 +1152,7 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = asm; + let AsmString = asm; let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; } @@ -1163,7 +1163,7 @@ class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = asm; + let AsmString = asm; let Pattern = pattern; list<Predicate> Predicates = [IsThumb1Only]; } @@ -1280,7 +1280,7 @@ class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, im, f, VFPDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat(opc, !strconcat("${p}", asm)); + let AsmString = !strconcat(opc, !strconcat("${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [HasVFP2]; } @@ -1292,7 +1292,7 @@ class VFPXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, im, f, VFPDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = asm; + let AsmString = asm; let Pattern = pattern; list<Predicate> Predicates = [HasVFP2]; } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index ce5f2f8..f3156d9 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -46,6 +46,7 @@ def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>; def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; +def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>; def SDT_ARMMEMBARRIERV7 : SDTypeProfile<0, 0, []>; def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>; @@ -100,7 +101,10 @@ def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>; def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInFlag ]>; def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>; -def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>; +def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", + SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>; +def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", + SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>; def ARMMemBarrierV7 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV7, [SDNPHasChain]>; @@ -128,6 +132,8 @@ def NoVFP : Predicate<"!Subtarget->hasVFP2()">; def HasVFP2 : Predicate<"Subtarget->hasVFP2()">; def HasVFP3 : Predicate<"Subtarget->hasVFP3()">; def HasNEON : Predicate<"Subtarget->hasNEON()">; +def HasDivide : Predicate<"Subtarget->hasDivide()">; +def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">; def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">; def IsThumb : Predicate<"Subtarget->isThumb()">; @@ -654,12 +660,12 @@ PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { def ADJCALLSTACKUP : PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary, - "@ ADJCALLSTACKUP $amt1", + "${:comment} ADJCALLSTACKUP $amt1", [(ARMcallseq_end timm:$amt1, timm:$amt2)]>; def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, - "@ ADJCALLSTACKDOWN $amt", + "${:comment} ADJCALLSTACKDOWN $amt", [(ARMcallseq_start timm:$amt)]>; } @@ -789,8 +795,11 @@ def DBG : AI<(outs), (ins i32imm:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt", } // A5.4 Permanently UNDEFINED instructions. -def TRAP : AI<(outs), (ins), MiscFrm, NoItinerary, "trap", "", - [/* For disassembly only; pattern left blank */]>, +// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to +// binutils +let isBarrier = 1, isTerminator = 1 in +def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary, + ".long 0xe7ffdefe ${:comment} trap", [(trap)]>, Requires<[IsARM]> { let Inst{27-25} = 0b011; let Inst{24-20} = 0b11111; @@ -843,25 +852,19 @@ def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), // LEApcrel - Load a pc-relative address into a register without offending the // assembler. +let neverHasSideEffects = 1 in { +let isReMaterializable = 1 in def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p), Pseudo, IIC_iALUi, - !strconcat(!strconcat(".set ${:private}PCRELV${:uid}, ($label-(", - "${:private}PCRELL${:uid}+8))\n"), - !strconcat("${:private}PCRELL${:uid}:\n\t", - "add$p\t$dst, pc, #${:private}PCRELV${:uid}")), - []>; + "adr$p\t$dst, #$label", []>; def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, nohash_imm:$id, pred:$p), - Pseudo, IIC_iALUi, - !strconcat(!strconcat(".set ${:private}PCRELV${:uid}, " - "(${label}_${id}-(", - "${:private}PCRELL${:uid}+8))\n"), - !strconcat("${:private}PCRELL${:uid}:\n\t", - "add$p\t$dst, pc, #${:private}PCRELV${:uid}")), - []> { + Pseudo, IIC_iALUi, + "adr$p\t$dst, #${label}_${id}", []> { let Inst{25} = 1; } +} // neverHasSideEffects //===----------------------------------------------------------------------===// // Control Flow Instructions. @@ -1134,7 +1137,8 @@ def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, [(set GPR:$dst, (load addrmode2:$addr))]>; // Special LDR for loads from non-pc-relative constpools. -let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in +let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1, + isReMaterializable = 1 in def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, "ldr", "\t$dst, $addr", []>; @@ -1156,7 +1160,7 @@ def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadr, "ldrsb", "\t$dst, $addr", [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>; -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadr, "ldrd", "\t$dst1, $addr", @@ -1215,7 +1219,7 @@ def LDRD_POST : AI3lddpo<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb), "ldrd", "\t$dst1, $dst2, [$base], $offset", "$base = $base_wb", []>, Requires<[IsARM, HasV5TE]>; -} +} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 // LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only. @@ -1264,7 +1268,7 @@ def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer, [(truncstorei8 GPR:$src, addrmode2:$addr)]>; // Store doubleword -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr), StMiscFrm, IIC_iStorer, "strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>; @@ -1356,7 +1360,7 @@ def STRHT: AI3sthpo<(outs GPR:$base_wb), // Load / store multiple Instructions. // -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeNone, LdStMulFrm, IIC_iLoadm, @@ -1367,9 +1371,9 @@ def LDM_UPD : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, IndexModeUpd, LdStMulFrm, IIC_iLoadm, "ldm${addr:submode}${p}\t$addr!, $dsts", "$addr.addr = $wb", []>; -} // mayLoad, hasExtraDefRegAllocReq +} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeNone, LdStMulFrm, IIC_iStorem, @@ -1380,7 +1384,7 @@ def STM_UPD : AXI4st<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, IndexModeUpd, LdStMulFrm, IIC_iStorem, "stm${addr:submode}${p}\t$addr!, $srcs", "$addr.addr = $wb", []>; -} // mayStore, hasExtraSrcRegAllocReq +} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq //===----------------------------------------------------------------------===// // Move Instructions. @@ -2198,6 +2202,7 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm), // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( +let neverHasSideEffects = 1 in { def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm, IIC_iCMOVr, "mov", "\t$dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, @@ -2221,6 +2226,7 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst), RegConstraint<"$false = $dst">, UnaryDP { let Inst{25} = 1; } +} // neverHasSideEffects //===----------------------------------------------------------------------===// // Atomic operations intrinsics @@ -2528,12 +2534,12 @@ let Defs = def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src, GPR:$val), AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, NoItinerary, - "str\tsp, [$src, #+8] @ eh_setjmp begin\n\t" + "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t" "add\t$val, pc, #8\n\t" "str\t$val, [$src, #+4]\n\t" "mov\tr0, #0\n\t" "add\tpc, pc, #0\n\t" - "mov\tr0, #1 @ eh_setjmp end", "", + "mov\tr0, #1 ${:comment} eh_setjmp end", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, Requires<[IsARM, HasVFP2]>; } @@ -2543,16 +2549,30 @@ let Defs = def Int_eh_sjlj_setjmp_nofp : XI<(outs), (ins GPR:$src, GPR:$val), AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, NoItinerary, - "str\tsp, [$src, #+8] @ eh_setjmp begin\n\t" + "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t" "add\t$val, pc, #8\n\t" "str\t$val, [$src, #+4]\n\t" "mov\tr0, #0\n\t" "add\tpc, pc, #0\n\t" - "mov\tr0, #1 @ eh_setjmp end", "", + "mov\tr0, #1 ${:comment} eh_setjmp end", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, Requires<[IsARM, NoVFP]>; } +// FIXME: Non-Darwin version(s) +let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, + Defs = [ R7, LR, SP ] in { +def Int_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch), + AddrModeNone, SizeSpecial, IndexModeNone, + Pseudo, NoItinerary, + "ldr\tsp, [$src, #8]\n\t" + "ldr\t$scratch, [$src, #4]\n\t" + "ldr\tr7, [$src]\n\t" + "bx\t$scratch", "", + [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, + Requires<[IsARM, IsDarwin]>; +} + //===----------------------------------------------------------------------===// // Non-Instruction Patterns // diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index d5ce2b8..197ec16 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -115,7 +115,7 @@ def h64imm : Operand<i64> { // NEON load / store instructions //===----------------------------------------------------------------------===// -let mayLoad = 1 in { +let mayLoad = 1, neverHasSideEffects = 1 in { // Use vldmia to load a Q register as a D register pair. // This is equivalent to VLDMD except that it has a Q register operand // instead of a pair of D registers. @@ -123,11 +123,6 @@ def VLDMQ : AXDI5<(outs QPR:$dst), (ins addrmode5:$addr, pred:$p), IndexModeNone, IIC_fpLoadm, "vldm${addr:submode}${p}\t${addr:base}, ${dst:dregpair}", "", []>; -def VLDMQ_UPD - : AXDI5<(outs QPR:$dst, GPR:$wb), (ins addrmode5:$addr, pred:$p), - IndexModeUpd, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}!, ${dst:dregpair}", - "$addr.base = $wb", []>; // Use vld1 to load a Q register as a D register pair. // This alternative to VLDMQ allows an alignment to be specified. @@ -135,13 +130,9 @@ def VLDMQ_UPD def VLD1q : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>; -def VLD1q_UPD - : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", "64", - "${dst:dregpair}, $addr$offset", "$addr.addr = $wb", []>; -} // mayLoad = 1 +} // mayLoad = 1, neverHasSideEffects = 1 -let mayStore = 1 in { +let mayStore = 1, neverHasSideEffects = 1 in { // Use vstmia to store a Q register as a D register pair. // This is equivalent to VSTMD except that it has a Q register operand // instead of a pair of D registers. @@ -149,11 +140,6 @@ def VSTMQ : AXDI5<(outs), (ins QPR:$src, addrmode5:$addr, pred:$p), IndexModeNone, IIC_fpStorem, "vstm${addr:submode}${p}\t${addr:base}, ${src:dregpair}", "", []>; -def VSTMQ_UPD - : AXDI5<(outs GPR:$wb), (ins QPR:$src, addrmode5:$addr, pred:$p), - IndexModeUpd, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}!, ${src:dregpair}", - "$addr.base = $wb", []>; // Use vst1 to store a Q register as a D register pair. // This alternative to VSTMQ allows an alignment to be specified. @@ -161,14 +147,9 @@ def VSTMQ_UPD def VST1q : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, "vst1", "64", "${src:dregpair}, $addr", "", []>; -def VST1q_UPD - : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, QPR:$src), - IIC_VST, "vst1", "64", "{$src:dregpair}, $addr$offset", - "$addr.addr = $wb", []>; -} // mayStore = 1 +} // mayStore = 1, neverHasSideEffects = 1 -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // VLD1 : Vector Load (multiple single elements) class VLD1D<bits<4> op7_4, string Dt> @@ -492,9 +473,9 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">; // VLD3DUP : Vector Load (single 3-element structure to all lanes) // VLD4DUP : Vector Load (single 4-element structure to all lanes) // FIXME: Not yet implemented. -} // mayLoad = 1, hasExtraDefRegAllocReq = 1 +} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { // VST1 : Vector Store (multiple single elements) class VST1D<bits<4> op7_4, string Dt> @@ -807,7 +788,7 @@ def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">; def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">; def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">; -} // mayStore = 1, hasExtraSrcRegAllocReq = 1 +} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 //===----------------------------------------------------------------------===// @@ -815,27 +796,32 @@ def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">; //===----------------------------------------------------------------------===// // Extract D sub-registers of Q registers. -// (arm_dsubreg_0 is 5; arm_dsubreg_1 is 6) def DSubReg_i8_reg : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(5 + N->getZExtValue() / 8, MVT::i32); + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32); }]>; def DSubReg_i16_reg : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(5 + N->getZExtValue() / 4, MVT::i32); + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32); }]>; def DSubReg_i32_reg : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(5 + N->getZExtValue() / 2, MVT::i32); + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32); }]>; def DSubReg_f64_reg : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(5 + N->getZExtValue(), MVT::i32); + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32); }]>; def DSubReg_f64_other_reg : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(5 + (1 - N->getZExtValue()), MVT::i32); + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + return CurDAG->getTargetConstant(ARM::dsub_0 + (1 - N->getZExtValue()), + MVT::i32); }]>; // Extract S sub-registers of Q/D registers. -// (arm_ssubreg_0 is 1; arm_ssubreg_1 is 2; etc.) def SSubReg_f32_reg : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(1 + N->getZExtValue(), MVT::i32); + assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); + return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32); }]>; // Translate lane numbers from Q registers to D subregs. @@ -2829,11 +2815,21 @@ def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, // VMOV : Vector Move (Register) +let neverHasSideEffects = 1 in { def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>; def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>; +// Pseudo vector move instructions for QQ and QQQQ registers. This should +// be expanded after register allocation is completed. +def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src), + NoItinerary, "${:comment} vmov\t$dst, $src", []>; + +def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src), + NoItinerary, "${:comment} vmov\t$dst, $src", []>; +} // neverHasSideEffects + // VMOV : Vector Move (Immediate) // VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm. @@ -2871,6 +2867,7 @@ def vmovImm64 : PatLeaf<(build_vector), [{ // Note: Some of the cmode bits in the following VMOV instructions need to // be encoded based on the immed values. +let isReMaterializable = 1 in { def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), (ins h8imm:$SIMM), IIC_VMOVImm, "vmov", "i8", "$dst, $SIMM", "", @@ -2906,6 +2903,7 @@ def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), (ins h64imm:$SIMM), IIC_VMOVImm, "vmov", "i64", "$dst, $SIMM", "", [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; +} // isReMaterializable // VMOV : Vector Get Lane (move scalar to ARM core register) @@ -3018,11 +3016,11 @@ def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; def : Pat<(v2f32 (scalar_to_vector SPR:$src)), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>; + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, arm_dsubreg_0)>; + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; def : Pat<(v4f32 (scalar_to_vector SPR:$src)), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>; + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; def : Pat<(v8i8 (scalar_to_vector GPR:$src)), (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; @@ -3034,15 +3032,15 @@ def : Pat<(v2i32 (scalar_to_vector GPR:$src)), def : Pat<(v16i8 (scalar_to_vector GPR:$src)), (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), - arm_dsubreg_0)>; + dsub_0)>; def : Pat<(v8i16 (scalar_to_vector GPR:$src)), (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), - arm_dsubreg_0)>; + dsub_0)>; def : Pat<(v4i32 (scalar_to_vector GPR:$src)), (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), - arm_dsubreg_0)>; + dsub_0)>; // VDUP : Vector Duplicate (from ARM core register to all elements) @@ -3376,27 +3374,27 @@ def VTBX4 class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst> : NEONFPPat<(ResTy (OpNode SPR:$a)), (EXTRACT_SUBREG (OpTy (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), - SPR:$a, arm_ssubreg_0))), - arm_ssubreg_0)>; + SPR:$a, ssub_0))), + ssub_0)>; class N3VSPat<SDNode OpNode, NeonI Inst> : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), (EXTRACT_SUBREG (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$a, arm_ssubreg_0), + SPR:$a, ssub_0), (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$b, arm_ssubreg_0))), - arm_ssubreg_0)>; + SPR:$b, ssub_0))), + ssub_0)>; class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$acc, arm_ssubreg_0), + SPR:$acc, ssub_0), (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$a, arm_ssubreg_0), + SPR:$a, ssub_0), (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$b, arm_ssubreg_0)), - arm_ssubreg_0)>; + SPR:$b, ssub_0)), + ssub_0)>; // These need separate instructions because they must use DPR_VFP2 register // class which have SPR sub-registers. diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index e3ca536..40f924b 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -127,12 +127,12 @@ def t_addrmode_sp : Operand<i32>, let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { def tADJCALLSTACKUP : PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary, - "@ tADJCALLSTACKUP $amt1", + "${:comment} tADJCALLSTACKUP $amt1", [(ARMcallseq_end imm:$amt1, imm:$amt2)]>, Requires<[IsThumb1Only]>; def tADJCALLSTACKDOWN : PseudoInst<(outs), (ins i32imm:$amt), NoItinerary, - "@ tADJCALLSTACKDOWN $amt", + "${:comment} tADJCALLSTACKDOWN $amt", [(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb1Only]>; } @@ -254,14 +254,14 @@ def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, // Pseudo instruction that will expand into a tSUBspi + a copy. let usesCustomInserter = 1 in { // Expanded after instruction selection. def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), - NoItinerary, "@ sub\t$dst, $rhs", []>; + NoItinerary, "${:comment} sub\t$dst, $rhs", []>; def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), - NoItinerary, "@ add\t$dst, $rhs", []>; + NoItinerary, "${:comment} add\t$dst, $rhs", []>; let Defs = [CPSR] in def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), - NoItinerary, "@ and\t$dst, $rhs", []>; + NoItinerary, "${:comment} and\t$dst, $rhs", []>; } // usesCustomInserter //===----------------------------------------------------------------------===// @@ -374,7 +374,7 @@ let isBranch = 1, isTerminator = 1 in { // Far jump let Defs = [LR] in def tBfar : TIx2<0b11110, 0b11, 1, (outs), (ins brtarget:$target), IIC_Br, - "bl\t$target\t@ far jump",[]>; + "bl\t$target\t${:comment} far jump",[]>; def tBR_JTr : T1JTI<(outs), (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id), @@ -417,9 +417,13 @@ def tSVC : T1pI<(outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", []>, } } -// A8.6.16 B: Encoding T1 -- for disassembly only +// A8.6.16 B: Encoding T1 // If Inst{11-8} == 0b1110 then UNDEFINED -def tTRAP : T1I<(outs), (ins), IIC_Br, "trap", []>, Encoding16 { +// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to +// binutils +let isBarrier = 1, isTerminator = 1 in +def tTRAP : TI<(outs), (ins), IIC_Br, + ".short 0xdefe ${:comment} trap", [(trap)]>, Encoding16 { let Inst{15-12} = 0b1101; let Inst{11-8} = 0b1110; } @@ -476,7 +480,7 @@ def tLDRspi : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi, // Special instruction for restore. It cannot clobber condition register // when it's expanded by eliminateCallFramePseudoInstr(). -let canFoldAsLoad = 1, mayLoad = 1 in +let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1 in def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi, "ldr", "\t$dst, $addr", []>, T1LdStSP<{1,?,?}>; @@ -490,7 +494,8 @@ def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, T1Encoding<{0,1,0,0,1,?}>; // A6.2 & A8.6.59 // Special LDR for loads from non-pc-relative constpools. -let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in +let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1, + isReMaterializable = 1 in def tLDRcp : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, "ldr", "\t$dst, $addr", []>, T1LdStSP<{1,?,?}>; @@ -527,7 +532,7 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei, [(store tGPR:$src, t_addrmode_sp:$addr)]>, T1LdStSP<{0,?,?}>; -let mayStore = 1 in { +let mayStore = 1, neverHasSideEffects = 1 in { // Special instruction for spill. It cannot clobber condition register // when it's expanded by eliminateCallFramePseudoInstr(). def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei, @@ -540,7 +545,7 @@ def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei, // // These requires base address to be written back or one of the loaded regs. -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { def tLDM : T1I<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IIC_iLoadm, @@ -553,9 +558,9 @@ def tLDM_UPD : T1It<(outs tGPR:$wb), "ldm${addr:submode}${p}\t$addr!, $dsts", "$addr.addr = $wb", []>, T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53 -} // mayLoad, hasExtraDefRegAllocReq +} // mayLoad, neverHasSideEffects = 1, hasExtraDefRegAllocReq -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in def tSTM_UPD : T1It<(outs tGPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IIC_iStorem, @@ -866,11 +871,12 @@ def tUXTH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, let usesCustomInserter = 1 in // Expanded after instruction selection. def tMOVCCr_pseudo : PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc), - NoItinerary, "@ tMOVCCr $cc", + NoItinerary, "${:comment} tMOVCCr $cc", [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>; // 16-bit movcc in IT blocks for Thumb2. +let neverHasSideEffects = 1 in { def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr, "mov", "\t$dst, $rhs", []>, T1Special<{1,0,?,?}>; @@ -878,9 +884,12 @@ def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr, def tMOVCCi : T1pIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMOVi, "mov", "\t$dst, $rhs", []>, T1General<{1,0,0,?,?}>; +} // neverHasSideEffects // tLEApcrel - Load a pc-relative address into a register without offending the // assembler. +let neverHasSideEffects = 1 in { +let isReMaterializable = 1 in def tLEApcrel : T1I<(outs tGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, "adr$p\t$dst, #$label", []>, T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10 @@ -889,6 +898,7 @@ def tLEApcrelJT : T1I<(outs tGPR:$dst), (ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi, "adr$p\t$dst, #${label}_${id}", []>, T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10 +} // neverHasSideEffects //===----------------------------------------------------------------------===// // TLS Instructions @@ -918,16 +928,32 @@ let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ] in { def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, - "str\t$val, [$src, #8]\t@ begin eh.setjmp\n" + "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n" "\tmov\t$val, pc\n" - "\tadds\t$val, #9\n" + "\tadds\t$val, #7\n" "\tstr\t$val, [$src, #4]\n" "\tmovs\tr0, #0\n" "\tb\t1f\n" - "\tmovs\tr0, #1\t@ end eh.setjmp\n" + "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n" "1:", "", [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>; } + +// FIXME: Non-Darwin version(s) +let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, + Defs = [ R7, LR, SP ] in { +def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch), + AddrModeNone, SizeSpecial, IndexModeNone, + Pseudo, NoItinerary, + "ldr\t$scratch, [$src, #8]\n\t" + "mov\tsp, $scratch\n\t" + "ldr\t$scratch, [$src, #4]\n\t" + "ldr\tr7, [$src]\n\t" + "bx\t$scratch", "", + [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, + Requires<[IsThumb, IsDarwin]>; +} + //===----------------------------------------------------------------------===// // Non-Instruction Patterns // @@ -1011,7 +1037,7 @@ def : T1Pat<(i32 imm0_255_comp:$src), // scheduling. let isReMaterializable = 1 in def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), - NoItinerary, "@ ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", + NoItinerary, "${:comment} ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), imm:$cp))]>, Requires<[IsThumb1Only]>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 742bd40..b91c089 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -185,8 +185,8 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, - opc, ".w\t$dst, $src", + def r : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, + opc, ".w\t$dst, $src", [(set GPR:$dst, (opnode GPR:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -198,9 +198,9 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def s : T2I<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi, - opc, ".w\t$dst, $src", - [(set GPR:$dst, (opnode t2_so_reg:$src))]> { + def s : T2sI<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi, + opc, ".w\t$dst, $src", + [(set GPR:$dst, (opnode t2_so_reg:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -210,7 +210,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, } /// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a -// binary operation that produces a value. These are predicable and can be +/// binary operation that produces a value. These are predicable and can be /// changed to modify CPSR. multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0, string wide =""> { @@ -259,23 +259,23 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc, PatFrag opnode, /// T2I_bin_irs counterpart. multiclass T2I_rbin_is<bits<4> opcod, string opc, PatFrag opnode> { // shifted imm - def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, - opc, ".w\t$dst, $rhs, $lhs", - [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> { + def ri : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, + opc, ".w\t$dst, $rhs, $lhs", + [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; - let Inst{20} = 0; // The S bit. + let Inst{20} = ?; // The S bit. let Inst{15} = 0; } // shifted register - def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, - opc, "\t$dst, $rhs, $lhs", - [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> { + def rs : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, + opc, "\t$dst, $rhs, $lhs", + [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; - let Inst{20} = 0; // The S bit. + let Inst{20} = ?; // The S bit. } } @@ -461,10 +461,9 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, let Defs = [CPSR] in { multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> { // shifted imm - def ri : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs, cc_out:$s), - IIC_iALUi, - !strconcat(opc, "${s}.w\t$dst, $rhs, $lhs"), - [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> { + def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, + !strconcat(opc, "s"), ".w\t$dst, $rhs, $lhs", + [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -472,10 +471,9 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> { let Inst{15} = 0; } // shifted register - def rs : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs, cc_out:$s), - IIC_iALUsi, - !strconcat(opc, "${s}\t$dst, $rhs, $lhs"), - [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> { + def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, + !strconcat(opc, "s"), "\t$dst, $rhs, $lhs", + [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -639,7 +637,8 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> { multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, opc, ".w\t$dst, $src", - [(set GPR:$dst, (opnode GPR:$src))]> { + [(set GPR:$dst, (opnode GPR:$src))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -650,7 +649,8 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { } def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, opc, ".w\t$dst, $src, ror $rot", - [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> { + [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -665,7 +665,8 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> { def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, opc, "\t$dst, $src", - [(set GPR:$dst, (opnode GPR:$src))]> { + [(set GPR:$dst, (opnode GPR:$src))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -676,7 +677,8 @@ multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> { } def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, opc, "\t$dst, $src, ror $rot", - [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> { + [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -717,7 +719,8 @@ multiclass T2I_unary_rrot_DO<bits<3> opcod, string opc> { multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> { def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr, opc, "\t$dst, $LHS, $RHS", - [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]> { + [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -728,7 +731,8 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> { def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot), IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", [(set GPR:$dst, (opnode GPR:$LHS, - (rotr GPR:$RHS, rot_imm:$rot)))]> { + (rotr GPR:$RHS, rot_imm:$rot)))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -771,6 +775,8 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> { // LEApcrel - Load a pc-relative address into a register without offending the // assembler. +let neverHasSideEffects = 1 in { +let isReMaterializable = 1 in def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, "adr$p.w\t$dst, #$label", []> { let Inst{31-27} = 0b11110; @@ -792,6 +798,7 @@ def t2LEApcrelJT : T2XI<(outs GPR:$dst), let Inst{19-16} = 0b1111; // Rn let Inst{15} = 0; } +} // neverHasSideEffects // ADD r, sp, {so_imm|i12} def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), @@ -856,9 +863,11 @@ def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), let Inst{15} = 0; } -// Signed and unsigned division, for disassembly only +// Signed and unsigned division on v7-M def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, - "sdiv", "\t$dst, $a, $b", []> { + "sdiv", "\t$dst, $a, $b", + [(set GPR:$dst, (sdiv GPR:$a, GPR:$b))]>, + Requires<[HasDivide]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011100; let Inst{20} = 0b1; @@ -867,7 +876,9 @@ def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, } def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, - "udiv", "\t$dst, $a, $b", []> { + "udiv", "\t$dst, $a, $b", + [(set GPR:$dst, (udiv GPR:$a, GPR:$b))]>, + Requires<[HasDivide]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011101; let Inst{20} = 0b1; @@ -878,11 +889,11 @@ def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, // Pseudo instruction that will expand into a t2SUBrSPi + a copy. let usesCustomInserter = 1 in { // Expanded after instruction selection. def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), - NoItinerary, "@ sub.w\t$dst, $sp, $imm", []>; + NoItinerary, "${:comment} sub.w\t$dst, $sp, $imm", []>; def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), - NoItinerary, "@ subw\t$dst, $sp, $imm", []>; + NoItinerary, "${:comment} subw\t$dst, $sp, $imm", []>; def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), - NoItinerary, "@ sub\t$dst, $sp, $rhs", []>; + NoItinerary, "${:comment} sub\t$dst, $sp, $rhs", []>; } // usesCustomInserter @@ -902,7 +913,7 @@ defm t2LDRB : T2I_ld<0, 0b00, "ldrb", UnOpFrag<(zextloadi8 node:$Src)>>; defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", UnOpFrag<(sextloadi16 node:$Src)>>; defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>; -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2), (ins t2addrmode_imm8s4:$addr), @@ -912,7 +923,7 @@ def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2), "ldrd", "\t$dst1, $addr", []> { let Inst{19-16} = 0b1111; // Rn } -} +} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 // zextload i1 -> zextload i8 def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr), @@ -955,7 +966,7 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)), (t2LDRHpci tconstpool:$addr)>; // Indexed loads -let mayLoad = 1 in { +let mayLoad = 1, neverHasSideEffects = 1 in { def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$dst, GPR:$base_wb), (ins t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoadiu, @@ -1011,7 +1022,7 @@ def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb), AddrModeT2_i8, IndexModePost, IIC_iLoadiu, "ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb", []>; -} +} // mayLoad = 1, neverHasSideEffects = 1 // LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are // for disassembly only. @@ -1041,7 +1052,7 @@ defm t2STRB:T2I_st<0b00,"strb",BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; defm t2STRH:T2I_st<0b01,"strh",BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; // Store doubleword -let mayLoad = 1, hasExtraSrcRegAllocReq = 1 in +let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), (ins GPR:$src1, GPR:$src2, t2addrmode_imm8s4:$addr), IIC_iStorer, "strd", "\t$src1, $addr", []>; @@ -1204,7 +1215,7 @@ defm t2PLI : T2Ipl<1, 0, "pli">; // Load / store multiple Instructions. // -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IIC_iLoadm, "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts", []> { @@ -1227,9 +1238,9 @@ def t2LDM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, let Inst{21} = 1; // The W bit. let Inst{20} = 1; // Load } -} // mayLoad, hasExtraDefRegAllocReq +} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IIC_iStorem, "stm${addr:submode}${p}${addr:wide}\t$addr, $srcs", []> { @@ -1253,7 +1264,7 @@ def t2STM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, let Inst{21} = 1; // The W bit. let Inst{20} = 0; // Store } -} // mayStore, hasExtraSrcRegAllocReq +} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq //===----------------------------------------------------------------------===// // Move Instructions. @@ -1564,9 +1575,9 @@ def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, } let Defs = [CPSR] in { -def t2MOVsrl_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, - "lsrs.w\t$dst, $src, #1", - [(set GPR:$dst, (ARMsrl_flag GPR:$src))]> { +def t2MOVsrl_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, + "lsrs", ".w\t$dst, $src, #1", + [(set GPR:$dst, (ARMsrl_flag GPR:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -1577,9 +1588,9 @@ def t2MOVsrl_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, let Inst{14-12} = 0b000; let Inst{7-6} = 0b01; } -def t2MOVsra_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, - "asrs.w\t$dst, $src, #1", - [(set GPR:$dst, (ARMsra_flag GPR:$src))]> { +def t2MOVsra_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, + "asrs", ".w\t$dst, $src, #1", + [(set GPR:$dst, (ARMsra_flag GPR:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -2058,7 +2069,8 @@ def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF), (and (shl GPR:$src2, (i32 imm:$shamt)), - 0xFFFF0000)))]> { + 0xFFFF0000)))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-20} = 0b01100; @@ -2068,15 +2080,18 @@ def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), // Alternate cases for PKHBT where identities eliminate some nodes. def : T2Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)), - (t2PKHBT GPR:$src1, GPR:$src2, 0)>; + (t2PKHBT GPR:$src1, GPR:$src2, 0)>, + Requires<[HasT2ExtractPack]>; def : T2Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)), - (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>; + (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>, + Requires<[HasT2ExtractPack]>; def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000), (and (sra GPR:$src2, imm16_31:$shamt), - 0xFFFF)))]> { + 0xFFFF)))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-20} = 0b01100; @@ -2087,10 +2102,12 @@ def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. def : T2Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))), - (t2PKHTB GPR:$src1, GPR:$src2, 16)>; + (t2PKHTB GPR:$src1, GPR:$src2, 16)>, + Requires<[HasT2ExtractPack]>; def : T2Pat<(or (and GPR:$src1, 0xFFFF0000), (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)), - (t2PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>; + (t2PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>, + Requires<[HasT2ExtractPack]>; //===----------------------------------------------------------------------===// // Comparison Instructions... @@ -2127,6 +2144,7 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq", // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( +let neverHasSideEffects = 1 in { def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr, "mov", ".w\t$dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, @@ -2178,6 +2196,7 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs), IIC_iCMOVsi, "ror", ".w\t$dst, $true, $rhs", []>, RegConstraint<"$false = $dst">; +} // neverHasSideEffects //===----------------------------------------------------------------------===// // Atomic operations intrinsics @@ -2378,13 +2397,13 @@ let Defs = D31 ] in { def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, - "str\t$val, [$src, #8]\t@ begin eh.setjmp\n" + "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n" "\tmov\t$val, pc\n" - "\tadds\t$val, #9\n" + "\tadds\t$val, #7\n" "\tstr\t$val, [$src, #4]\n" "\tmovs\tr0, #0\n" "\tb\t1f\n" - "\tmovs\tr0, #1\t@ end eh.setjmp\n" + "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n" "1:", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, Requires<[IsThumb2, HasVFP2]>; @@ -2394,13 +2413,13 @@ let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in { def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, - "str\t$val, [$src, #8]\t@ begin eh.setjmp\n" + "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n" "\tmov\t$val, pc\n" - "\tadds\t$val, #9\n" + "\tadds\t$val, #7\n" "\tstr\t$val, [$src, #4]\n" "\tmovs\tr0, #0\n" "\tb\t1f\n" - "\tmovs\tr0, #1\t@ end eh.setjmp\n" + "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n" "1:", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, Requires<[IsThumb2, NoVFP]>; @@ -2672,7 +2691,7 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), // scheduling. let canFoldAsLoad = 1, isReMaterializable = 1 in def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), - NoItinerary, "@ ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", + NoItinerary, "${:comment} ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), imm:$cp))]>, Requires<[IsThumb2]>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 36fcaa1..54474cf 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -76,7 +76,7 @@ def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), // Load / store multiple Instructions. // -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeNone, IIC_fpLoadm, "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> { @@ -104,9 +104,9 @@ def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, "$addr.base = $wb", []> { let Inst{20} = 1; } -} // mayLoad, hasExtraDefRegAllocReq +} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeNone, IIC_fpStorem, "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> { @@ -134,7 +134,7 @@ def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, "$addr.base = $wb", []> { let Inst{20} = 0; } -} // mayStore, hasExtraSrcRegAllocReq +} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq // FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores @@ -313,6 +313,7 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src), IIC_fpMOVIS, "vmov", "\t$dst, $src", [(set SPR:$dst, (bitconvert GPR:$src))]>; +let neverHasSideEffects = 1 in { def VMOVRRD : AVConv3I<0b11000101, 0b1011, (outs GPR:$wb, GPR:$dst2), (ins DPR:$src), IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src", @@ -326,6 +327,7 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010, [/* For disassembly only; pattern left blank */]> { let Inst{7-6} = 0b00; } +} // neverHasSideEffects // FMDHR: GPR -> SPR // FMDLR: GPR -> SPR @@ -337,6 +339,7 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011, let Inst{7-6} = 0b00; } +let neverHasSideEffects = 1 in def VMOVSRR : AVConv5I<0b11000100, 0b1010, (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2), IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", @@ -606,6 +609,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, // FP Conditional moves. // +let neverHasSideEffects = 1 in { def VMOVDcc : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs DPR:$dst), (ins DPR:$false, DPR:$true), IIC_fpUNA64, "vmov", ".f64\t$dst, $true", @@ -629,7 +633,7 @@ def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0, IIC_fpUNA32, "vneg", ".f32\t$dst, $true", [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; - +} // neverHasSideEffects //===----------------------------------------------------------------------===// // Misc. @@ -651,6 +655,7 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", // FPSCR <-> GPR (for disassembly only) +let neverHasSideEffects = 1 in { let Uses = [FPSCR] in { def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", "\t$dst, fpscr", @@ -674,6 +679,7 @@ def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr", let Inst{4} = 1; } } +} // neverHasSideEffects // Materialize FP immediates. VFP3 only. let isReMaterializable = 1 in { diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index b31a4fa..5f6d7ee 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -318,6 +318,18 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR, *((intptr_t*)RelocPos) |= ResultPtr; break; } + case ARM::reloc_arm_movw: { + ResultPtr = ResultPtr & 0xFFFF; + *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; + *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; + break; + } + case ARM::reloc_arm_movt: { + ResultPtr = (ResultPtr >> 16) & 0xFFFF; + *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; + *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; + break; + } } } } diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h index 041afd0..8edfb9a 100644 --- a/lib/Target/ARM/ARMRegisterInfo.h +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -23,16 +23,6 @@ namespace llvm { class ARMBaseInstrInfo; class Type; -namespace ARM { - /// SubregIndex - The index of various subregister classes. Note that - /// these indices must be kept in sync with the class indices in the - /// ARMRegisterInfo.td file. - enum SubregIndex { - SSUBREG_0 = 1, SSUBREG_1 = 2, SSUBREG_2 = 3, SSUBREG_3 = 4, - DSUBREG_0 = 5, DSUBREG_1 = 6 - }; -} - struct ARMRegisterInfo : public ARMBaseRegisterInfo { public: ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI); diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 0d4200c..6beca8b 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -23,6 +23,44 @@ class ARMFReg<bits<6> num, string n> : Register<n> { let Namespace = "ARM"; } +// Subregister indices. +let Namespace = "ARM" in { +// Note: Code depends on these having consecutive numbers. +def ssub_0 : SubRegIndex; +def ssub_1 : SubRegIndex; +def ssub_2 : SubRegIndex; // In a Q reg. +def ssub_3 : SubRegIndex; +def ssub_4 : SubRegIndex; // In a QQ reg. +def ssub_5 : SubRegIndex; +def ssub_6 : SubRegIndex; +def ssub_7 : SubRegIndex; +def ssub_8 : SubRegIndex; // In a QQQQ reg. +def ssub_9 : SubRegIndex; +def ssub_10 : SubRegIndex; +def ssub_11 : SubRegIndex; +def ssub_12 : SubRegIndex; +def ssub_13 : SubRegIndex; +def ssub_14 : SubRegIndex; +def ssub_15 : SubRegIndex; + +def dsub_0 : SubRegIndex; +def dsub_1 : SubRegIndex; +def dsub_2 : SubRegIndex; +def dsub_3 : SubRegIndex; +def dsub_4 : SubRegIndex; +def dsub_5 : SubRegIndex; +def dsub_6 : SubRegIndex; +def dsub_7 : SubRegIndex; + +def qsub_0 : SubRegIndex; +def qsub_1 : SubRegIndex; +def qsub_2 : SubRegIndex; +def qsub_3 : SubRegIndex; + +def qqsub_0 : SubRegIndex; +def qqsub_1 : SubRegIndex; +} + // Integer registers def R0 : ARMReg< 0, "r0">, DwarfRegNum<[0]>; def R1 : ARMReg< 1, "r1">, DwarfRegNum<[1]>; @@ -58,9 +96,9 @@ def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">; def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">; def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">; def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">; -def SDummy : ARMFReg<63, "sINVALID">; // Aliases of the F* registers used to hold 64-bit fp values (doubles) +let SubRegIndices = [ssub_0, ssub_1] in { def D0 : ARMReg< 0, "d0", [S0, S1]>; def D1 : ARMReg< 1, "d1", [S2, S3]>; def D2 : ARMReg< 2, "d2", [S4, S5]>; @@ -77,6 +115,7 @@ def D12 : ARMReg<12, "d12", [S24, S25]>; def D13 : ARMReg<13, "d13", [S26, S27]>; def D14 : ARMReg<14, "d14", [S28, S29]>; def D15 : ARMReg<15, "d15", [S30, S31]>; +} // VFP3 defines 16 additional double registers def D16 : ARMFReg<16, "d16">; def D17 : ARMFReg<17, "d17">; @@ -89,6 +128,9 @@ def D28 : ARMFReg<28, "d28">; def D29 : ARMFReg<29, "d29">; def D30 : ARMFReg<30, "d30">; def D31 : ARMFReg<31, "d31">; // Advanced SIMD (NEON) defines 16 quad-word aliases +let SubRegIndices = [dsub_0, dsub_1], + CompositeIndices = [(ssub_2 dsub_1, ssub_0), + (ssub_3 dsub_1, ssub_1)] in { def Q0 : ARMReg< 0, "q0", [D0, D1]>; def Q1 : ARMReg< 1, "q1", [D2, D3]>; def Q2 : ARMReg< 2, "q2", [D4, D5]>; @@ -97,6 +139,8 @@ def Q4 : ARMReg< 4, "q4", [D8, D9]>; def Q5 : ARMReg< 5, "q5", [D10, D11]>; def Q6 : ARMReg< 6, "q6", [D12, D13]>; def Q7 : ARMReg< 7, "q7", [D14, D15]>; +} +let SubRegIndices = [dsub_0, dsub_1] in { def Q8 : ARMReg< 8, "q8", [D16, D17]>; def Q9 : ARMReg< 9, "q9", [D18, D19]>; def Q10 : ARMReg<10, "q10", [D20, D21]>; @@ -105,6 +149,51 @@ def Q12 : ARMReg<12, "q12", [D24, D25]>; def Q13 : ARMReg<13, "q13", [D26, D27]>; def Q14 : ARMReg<14, "q14", [D28, D29]>; def Q15 : ARMReg<15, "q15", [D30, D31]>; +} + +// Pseudo 256-bit registers to represent pairs of Q registers. These should +// never be present in the emitted code. +// These are used for NEON load / store instructions, e.g. vld4, vst3. +// NOTE: It's possible to define more QQ registers since technical the +// starting D register number doesn't have to be multiple of 4. e.g. +// D1, D2, D3, D4 would be a legal quad. But that would make the sub-register +// stuffs very messy. +let SubRegIndices = [qsub_0, qsub_1] in { +let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1), + (ssub_4 qsub_1, ssub_0), (ssub_5 qsub_1, ssub_1), + (ssub_6 qsub_1, ssub_2), (ssub_7 qsub_1, ssub_3)] in { +def QQ0 : ARMReg<0, "qq0", [Q0, Q1]>; +def QQ1 : ARMReg<1, "qq1", [Q2, Q3]>; +def QQ2 : ARMReg<2, "qq2", [Q4, Q5]>; +def QQ3 : ARMReg<3, "qq3", [Q6, Q7]>; +} +let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1)] in { +def QQ4 : ARMReg<4, "qq4", [Q8, Q9]>; +def QQ5 : ARMReg<5, "qq5", [Q10, Q11]>; +def QQ6 : ARMReg<6, "qq6", [Q12, Q13]>; +def QQ7 : ARMReg<7, "qq7", [Q14, Q15]>; +} +} + +// Pseudo 512-bit registers to represent four consecutive Q registers. +let SubRegIndices = [qqsub_0, qqsub_1] in { +let CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1), + (dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1), + (dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3), + (ssub_8 qqsub_1, ssub_0), (ssub_9 qqsub_1, ssub_1), + (ssub_10 qqsub_1, ssub_2), (ssub_11 qqsub_1, ssub_3), + (ssub_12 qqsub_1, ssub_4), (ssub_13 qqsub_1, ssub_5), + (ssub_14 qqsub_1, ssub_6), (ssub_15 qqsub_1, ssub_7)] in { +def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>; +def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>; +} +let CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1), + (dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1), + (dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3)] in { +def QQQQ2 : ARMReg<2, "qqqq2", [QQ4, QQ5]>; +def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>; +} +} // Current Program Status Register. def CPSR : ARMReg<0, "cpsr">; @@ -270,11 +359,6 @@ def SPR_8 : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15]>; -// Dummy f32 regclass to represent impossible subreg indices. -def SPR_INVALID : RegisterClass<"ARM", [f32], 32, [SDummy]> { - let CopyCost = -1; -} - // Scalar double precision floating point / generic 64-bit vector register // class. // ARM requires only word alignment for double. It's more performant if it @@ -284,7 +368,6 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, D8, D9, D10, D11, D12, D13, D14, D15, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31]> { - let SubRegClassList = [SPR_INVALID, SPR_INVALID]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -332,79 +415,68 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15]> { - let SubRegClassList = [SPR, SPR]; + let SubRegClasses = [(SPR ssub_0, ssub_1)]; } // Subset of DPR which can be used as a source of NEON scalars for 16-bit // operations def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, [D0, D1, D2, D3, D4, D5, D6, D7]> { - let SubRegClassList = [SPR_8, SPR_8]; + let SubRegClasses = [(SPR_8 ssub_0, ssub_1)]; } // Generic 128-bit vector register class. def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15]> { - let SubRegClassList = [SPR_INVALID, SPR_INVALID, SPR_INVALID, SPR_INVALID, - DPR, DPR]; + let SubRegClasses = [(DPR dsub_0, dsub_1)]; } // Subset of QPR that have 32-bit SPR subregs. def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]> { - let SubRegClassList = [SPR, SPR, SPR, SPR, DPR_VFP2, DPR_VFP2]; + let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3), + (DPR_VFP2 dsub_0, dsub_1)]; } // Subset of QPR that have DPR_8 and SPR_8 subregs. def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, [Q0, Q1, Q2, Q3]> { - let SubRegClassList = [SPR_8, SPR_8, SPR_8, SPR_8, DPR_8, DPR_8]; + let SubRegClasses = [(SPR_8 ssub_0, ssub_1, ssub_2, ssub_3), + (DPR_8 dsub_0, dsub_1)]; +} + +// Pseudo 256-bit vector register class to model pairs of Q registers +// (4 consecutive D registers). +def QQPR : RegisterClass<"ARM", [v4i64], + 256, + [QQ0, QQ1, QQ2, QQ3, QQ4, QQ5, QQ6, QQ7]> { + let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3), + (QPR qsub_0, qsub_1)]; +} + +// Subset of QQPR that have 32-bit SPR subregs. +def QQPR_VFP2 : RegisterClass<"ARM", [v4i64], + 256, + [QQ0, QQ1, QQ2, QQ3]> { + let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3), + (DPR_VFP2 dsub_0, dsub_1, dsub_2, dsub_3), + (QPR_VFP2 qsub_0, qsub_1)]; + +} + +// Pseudo 512-bit vector register class to model 4 consecutive Q registers +// (8 consecutive D registers). +def QQQQPR : RegisterClass<"ARM", [v8i64], + 256, + [QQQQ0, QQQQ1, QQQQ2, QQQQ3]> { + let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3, + dsub_4, dsub_5, dsub_6, dsub_7), + (QPR qsub_0, qsub_1, qsub_2, qsub_3)]; } // Condition code registers. def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>; -//===----------------------------------------------------------------------===// -// Subregister Set Definitions... now that we have all of the pieces, define the -// sub registers for each register. -// - -def arm_ssubreg_0 : PatLeaf<(i32 1)>; -def arm_ssubreg_1 : PatLeaf<(i32 2)>; -def arm_ssubreg_2 : PatLeaf<(i32 3)>; -def arm_ssubreg_3 : PatLeaf<(i32 4)>; -def arm_dsubreg_0 : PatLeaf<(i32 5)>; -def arm_dsubreg_1 : PatLeaf<(i32 6)>; - -// S sub-registers of D registers. -def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7, - D8, D9, D10, D11, D12, D13, D14, D15], - [S0, S2, S4, S6, S8, S10, S12, S14, - S16, S18, S20, S22, S24, S26, S28, S30]>; -def : SubRegSet<2, [D0, D1, D2, D3, D4, D5, D6, D7, - D8, D9, D10, D11, D12, D13, D14, D15], - [S1, S3, S5, S7, S9, S11, S13, S15, - S17, S19, S21, S23, S25, S27, S29, S31]>; - -// S sub-registers of Q registers. -def : SubRegSet<1, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7], - [S0, S4, S8, S12, S16, S20, S24, S28]>; -def : SubRegSet<2, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7], - [S1, S5, S9, S13, S17, S21, S25, S29]>; -def : SubRegSet<3, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7], - [S2, S6, S10, S14, S18, S22, S26, S30]>; -def : SubRegSet<4, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7], - [S3, S7, S11, S15, S19, S23, S27, S31]>; - -// D sub-registers of Q registers. -def : SubRegSet<5, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, - Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15], - [D0, D2, D4, D6, D8, D10, D12, D14, - D16, D18, D20, D22, D24, D26, D28, D30]>; -def : SubRegSet<6, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, - Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15], - [D1, D3, D5, D7, D9, D11, D13, D15, - D17, D19, D21, D23, D25, D27, D29, D31]>; diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h index 2cc2950..86e7206 100644 --- a/lib/Target/ARM/ARMRelocations.h +++ b/lib/Target/ARM/ARMRelocations.h @@ -47,7 +47,13 @@ namespace llvm { reloc_arm_pic_jt, // reloc_arm_branch - Branch address relocation. - reloc_arm_branch + reloc_arm_branch, + + // reloc_arm_movt - MOVT immediate relocation. + reloc_arm_movt, + + // reloc_arm_movw - MOVW immediate relocation. + reloc_arm_movw }; } } diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index c04ee38..a289407 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -12,11 +12,123 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "arm-selectiondag-info" -#include "ARMSelectionDAGInfo.h" +#include "ARMTargetMachine.h" using namespace llvm; -ARMSelectionDAGInfo::ARMSelectionDAGInfo() { +ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM) + : TargetSelectionDAGInfo(TM), + Subtarget(&TM.getSubtarget<ARMSubtarget>()) { } ARMSelectionDAGInfo::~ARMSelectionDAGInfo() { } + +SDValue +ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const { + // Do repeated 4-byte loads and stores. To be improved. + // This requires 4-byte alignment. + if ((Align & 3) != 0) + return SDValue(); + // This requires the copy size to be a constant, preferrably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (!ConstantSize) + return SDValue(); + uint64_t SizeVal = ConstantSize->getZExtValue(); + if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) + return SDValue(); + + unsigned BytesLeft = SizeVal & 3; + unsigned NumMemOps = SizeVal >> 2; + unsigned EmittedNumMemOps = 0; + EVT VT = MVT::i32; + unsigned VTSize = 4; + unsigned i = 0; + const unsigned MAX_LOADS_IN_LDM = 6; + SDValue TFOps[MAX_LOADS_IN_LDM]; + SDValue Loads[MAX_LOADS_IN_LDM]; + uint64_t SrcOff = 0, DstOff = 0; + + // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the + // same number of stores. The loads and stores will get combined into + // ldm/stm later on. + while (EmittedNumMemOps < NumMemOps) { + for (i = 0; + i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, MVT::i32, Src, + DAG.getConstant(SrcOff, MVT::i32)), + SrcSV, SrcSVOff + SrcOff, isVolatile, false, 0); + TFOps[i] = Loads[i].getValue(1); + SrcOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + for (i = 0; + i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, + DAG.getConstant(DstOff, MVT::i32)), + DstSV, DstSVOff + DstOff, isVolatile, false, 0); + DstOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + EmittedNumMemOps += i; + } + + if (BytesLeft == 0) + return Chain; + + // Issue loads / stores for the trailing (1 - 3) bytes. + unsigned BytesLeftSave = BytesLeft; + i = 0; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, MVT::i32, Src, + DAG.getConstant(SrcOff, MVT::i32)), + SrcSV, SrcSVOff + SrcOff, false, false, 0); + TFOps[i] = Loads[i].getValue(1); + ++i; + SrcOff += VTSize; + BytesLeft -= VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + i = 0; + BytesLeft = BytesLeftSave; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, + DAG.getConstant(DstOff, MVT::i32)), + DstSV, DstSVOff + DstOff, false, false, 0); + ++i; + DstOff += VTSize; + BytesLeft -= VTSize; + } + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); +} diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h index afe9a47..d7d00c2 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -19,9 +19,24 @@ namespace llvm { class ARMSelectionDAGInfo : public TargetSelectionDAGInfo { + /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can + /// make the right decision when generating code for different targets. + const ARMSubtarget *Subtarget; + public: - ARMSelectionDAGInfo(); + explicit ARMSelectionDAGInfo(const TargetMachine &TM); ~ARMSelectionDAGInfo(); + + virtual + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const; }; } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index b11580a..10fd257 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -39,6 +39,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, , IsR9Reserved(ReserveR9) , UseMovt(UseMOVT) , HasFP16(false) + , HasHardwareDivide(false) + , HasT2ExtractPack(false) , stackAlignment(4) , CPUString("generic") , TargetType(isELF) // Default to ELF unless otherwise specified. @@ -73,6 +75,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, unsigned SubVer = TT[Idx]; if (SubVer >= '7' && SubVer <= '9') { ARMArchVersion = V7A; + if (Len >= Idx+2 && TT[Idx+1] == 'm') + ARMArchVersion = V7M; } else if (SubVer == '6') { ARMArchVersion = V6; if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 288a19a..8332bba 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -26,7 +26,7 @@ class GlobalValue; class ARMSubtarget : public TargetSubtarget { protected: enum ARMArchEnum { - V4, V4T, V5T, V5TE, V6, V6T2, V7A + V4, V4T, V5T, V5TE, V6, V6T2, V7A, V7M }; enum ARMFPEnum { @@ -39,7 +39,7 @@ protected: }; /// ARMArchVersion - ARM architecture version: V4, V4T (base), V5T, V5TE, - /// V6, V6T2, V7A. + /// V6, V6T2, V7A, V7M. ARMArchEnum ARMArchVersion; /// ARMFPUType - Floating Point Unit type. @@ -74,6 +74,13 @@ protected: /// only so far) bool HasFP16; + /// HasHardwareDivide - True if subtarget supports [su]div + bool HasHardwareDivide; + + /// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack + /// instructions. + bool HasT2ExtractPack; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -123,6 +130,8 @@ protected: bool hasNEON() const { return ARMFPUType >= NEON; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } + bool hasDivide() const { return HasHardwareDivide; } + bool hasT2ExtractPack() const { return HasT2ExtractPack; } bool useVMLx() const {return hasVFP2() && !SlowVMLx; } bool hasFP16() const { return HasFP16; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 662e61e..b4a9252 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -62,7 +62,8 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT, DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:32-i64:32:32-n32") : std::string("e-p:32:32-f64:64:64-i64:64:64-n32")), - TLInfo(*this) { + TLInfo(*this), + TSInfo(*this) { } ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, @@ -76,7 +77,8 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32") : std::string("e-p:32:32-f64:64:64-i64:64:64-" "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32")), - TLInfo(*this) { + TLInfo(*this), + TSInfo(*this) { } diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 4e205df..a222e57 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -21,6 +21,7 @@ #include "ARMJITInfo.h" #include "ARMSubtarget.h" #include "ARMISelLowering.h" +#include "ARMSelectionDAGInfo.h" #include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" #include "llvm/ADT/OwningPtr.h" @@ -63,6 +64,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine { ARMInstrInfo InstrInfo; const TargetData DataLayout; // Calculates type size & alignment ARMTargetLowering TLInfo; + ARMSelectionDAGInfo TSInfo; public: ARMTargetMachine(const Target &T, const std::string &TT, const std::string &FS); @@ -75,6 +77,10 @@ public: return &TLInfo; } + virtual const ARMSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetData *getTargetData() const { return &DataLayout; } }; @@ -88,6 +94,7 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { OwningPtr<ARMBaseInstrInfo> InstrInfo; const TargetData DataLayout; // Calculates type size & alignment ARMTargetLowering TLInfo; + ARMSelectionDAGInfo TSInfo; public: ThumbTargetMachine(const Target &T, const std::string &TT, const std::string &FS); @@ -101,6 +108,10 @@ public: return &TLInfo; } + virtual const ARMSelectionDAGInfo *getSelectionDAGInfo() const { + return &TSInfo; + } + /// returns either Thumb1InstrInfo or Thumb2InstrInfo virtual const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo.get(); diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 80a9d2d..d95efdb 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -319,16 +319,16 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, unsigned Reg = MO.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(Reg)); if (Modifier && strcmp(Modifier, "dregpair") == 0) { - unsigned DRegLo = TM.getRegisterInfo()->getSubReg(Reg, 5);// arm_dsubreg_0 - unsigned DRegHi = TM.getRegisterInfo()->getSubReg(Reg, 6);// arm_dsubreg_1 + unsigned DRegLo = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_0); + unsigned DRegHi = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_1); O << '{' << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi) << '}'; } else if (Modifier && strcmp(Modifier, "lane") == 0) { unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg); unsigned DReg = - TM.getRegisterInfo()->getMatchingSuperReg(Reg, RegNum & 1 ? 2 : 1, - &ARM::DPR_VFP2RegClass); + TM.getRegisterInfo()->getMatchingSuperReg(Reg, + RegNum & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass); O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']'; } else { assert(!MO.getSubReg() && "Subregs should be eliminated!"); @@ -1375,13 +1375,32 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { case ARM::MOVi32imm: { // FIXME: Remove asmstring from td file. // This is a hack that lowers as a two instruction sequence. unsigned DstReg = MI->getOperand(0).getReg(); - unsigned ImmVal = (unsigned)MI->getOperand(1).getImm(); - + const MachineOperand &MO = MI->getOperand(1); + MCOperand V1, V2; + if (MO.isImm()) { + unsigned ImmVal = (unsigned)MI->getOperand(1).getImm(); + V1 = MCOperand::CreateImm(ImmVal & 65535); + V2 = MCOperand::CreateImm(ImmVal >> 16); + } else if (MO.isGlobal()) { + MCSymbol *Symbol = MCInstLowering.GetGlobalAddressSymbol(MO); + const MCSymbolRefExpr *SymRef1 = + MCSymbolRefExpr::Create(Symbol, + MCSymbolRefExpr::VK_ARM_LO16, OutContext); + const MCSymbolRefExpr *SymRef2 = + MCSymbolRefExpr::Create(Symbol, + MCSymbolRefExpr::VK_ARM_HI16, OutContext); + V1 = MCOperand::CreateExpr(SymRef1); + V2 = MCOperand::CreateExpr(SymRef2); + } else { + MI->dump(); + llvm_unreachable("cannot handle this operand"); + } + { MCInst TmpInst; TmpInst.setOpcode(ARM::MOVi16); TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg - TmpInst.addOperand(MCOperand::CreateImm(ImmVal & 65535)); // lower16(imm) + TmpInst.addOperand(V1); // lower16(imm) // Predicate. TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); @@ -1395,7 +1414,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { TmpInst.setOpcode(ARM::MOVTi16); TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // srcreg - TmpInst.addOperand(MCOperand::CreateImm(ImmVal >> 16)); // upper16(imm) + TmpInst.addOperand(V2); // upper16(imm) // Predicate. TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index ac6331f..2b94b76 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -195,8 +195,8 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, // FIXME: Breaks e.g. ARM/vmul.ll. assert(0); /* - unsigned DRegLo = TRI->getSubReg(Reg, 5); // arm_dsubreg_0 - unsigned DRegHi = TRI->getSubReg(Reg, 6); // arm_dsubreg_1 + unsigned DRegLo = TRI->getSubReg(Reg, ARM::dsub_0); + unsigned DRegHi = TRI->getSubReg(Reg, ARM::dsub_1); O << '{' << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi) << '}';*/ @@ -217,7 +217,8 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, ((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported")); O << '#' << Op.getImm(); } else { - assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); + if (Modifier && Modifier[0] != 0 && strcmp(Modifier, "call") != 0) + llvm_unreachable("Unsupported modifier"); assert(Op.isExpr() && "unknown operand kind in printOperand"); O << *Op.getExpr(); } diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h index 383d30d..b81a306 100644 --- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h +++ b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h @@ -26,7 +26,7 @@ namespace llvm { //class ARMSubtarget; /// ARMMCInstLower - This class is used to lower an MachineInstr into an MCInst. -class VISIBILITY_HIDDEN ARMMCInstLower { +class LLVM_LIBRARY_VISIBILITY ARMMCInstLower { MCContext &Ctx; Mangler &Mang; AsmPrinter &Printer; diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp index 3c0414d..0a4400c 100644 --- a/lib/Target/ARM/NEONMoveFix.cpp +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -118,7 +118,7 @@ bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) { ARMFunctionInfo *AFI = Fn.getInfo<ARMFunctionInfo>(); const TargetMachine &TM = Fn.getTarget(); - if (AFI->isThumbFunction()) + if (AFI->isThumb1OnlyFunction()) return false; TRI = TM.getRegisterInfo(); diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index ef6bf3a..a725898 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -33,7 +33,8 @@ namespace { private: bool FormsRegSequence(MachineInstr *MI, - unsigned FirstOpnd, unsigned NumRegs); + unsigned FirstOpnd, unsigned NumRegs, + unsigned Offset, unsigned Stride) const; bool PreAllocNEONRegisters(MachineBasicBlock &MBB); }; @@ -338,24 +339,122 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, return false; } -bool NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, - unsigned FirstOpnd, unsigned NumRegs) { - MachineInstr *RegSeq = 0; +bool +NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, + unsigned FirstOpnd, unsigned NumRegs, + unsigned Offset, unsigned Stride) const { + MachineOperand &FMO = MI->getOperand(FirstOpnd); + assert(FMO.isReg() && FMO.getSubReg() == 0 && "unexpected operand"); + unsigned VirtReg = FMO.getReg(); + (void)VirtReg; + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "expected a virtual register"); + + unsigned LastSubIdx = 0; + if (FMO.isDef()) { + MachineInstr *RegSeq = 0; + for (unsigned R = 0; R < NumRegs; ++R) { + const MachineOperand &MO = MI->getOperand(FirstOpnd + R); + assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); + unsigned VirtReg = MO.getReg(); + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "expected a virtual register"); + // Feeding into a REG_SEQUENCE. + if (!MRI->hasOneNonDBGUse(VirtReg)) + return false; + MachineInstr *UseMI = &*MRI->use_nodbg_begin(VirtReg); + if (!UseMI->isRegSequence()) + return false; + if (RegSeq && RegSeq != UseMI) + return false; + unsigned OpIdx = 1 + (Offset + R * Stride) * 2; + if (UseMI->getOperand(OpIdx).getReg() != VirtReg) + llvm_unreachable("Malformed REG_SEQUENCE instruction!"); + unsigned SubIdx = UseMI->getOperand(OpIdx + 1).getImm(); + if (LastSubIdx) { + if (LastSubIdx != SubIdx-Stride) + return false; + } else { + // Must start from dsub_0 or qsub_0. + if (SubIdx != (ARM::dsub_0+Offset) && + SubIdx != (ARM::qsub_0+Offset)) + return false; + } + RegSeq = UseMI; + LastSubIdx = SubIdx; + } + + // In the case of vld3, etc., make sure the trailing operand of + // REG_SEQUENCE is an undef. + if (NumRegs == 3) { + unsigned OpIdx = 1 + (Offset + 3 * Stride) * 2; + const MachineOperand &MO = RegSeq->getOperand(OpIdx); + unsigned VirtReg = MO.getReg(); + MachineInstr *DefMI = MRI->getVRegDef(VirtReg); + if (!DefMI || !DefMI->isImplicitDef()) + return false; + } + return true; + } + + unsigned LastSrcReg = 0; + SmallVector<unsigned, 4> SubIds; for (unsigned R = 0; R < NumRegs; ++R) { - MachineOperand &MO = MI->getOperand(FirstOpnd + R); + const MachineOperand &MO = MI->getOperand(FirstOpnd + R); assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); unsigned VirtReg = MO.getReg(); assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "expected a virtual register"); - if (!MRI->hasOneNonDBGUse(VirtReg)) + // Extracting from a Q or QQ register. + MachineInstr *DefMI = MRI->getVRegDef(VirtReg); + if (!DefMI || !DefMI->isExtractSubreg()) return false; - MachineInstr *UseMI = &*MRI->use_nodbg_begin(VirtReg); - if (UseMI->getOpcode() != TargetOpcode::REG_SEQUENCE) + VirtReg = DefMI->getOperand(1).getReg(); + if (LastSrcReg && LastSrcReg != VirtReg) return false; - if (RegSeq && RegSeq != UseMI) + LastSrcReg = VirtReg; + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); + if (RC != ARM::QPRRegisterClass && + RC != ARM::QQPRRegisterClass && + RC != ARM::QQQQPRRegisterClass) return false; - RegSeq = UseMI; + unsigned SubIdx = DefMI->getOperand(2).getImm(); + if (LastSubIdx) { + if (LastSubIdx != SubIdx-Stride) + return false; + } else { + // Must start from dsub_0 or qsub_0. + if (SubIdx != (ARM::dsub_0+Offset) && + SubIdx != (ARM::qsub_0+Offset)) + return false; + } + SubIds.push_back(SubIdx); + LastSubIdx = SubIdx; } + + // FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is + // currently required for correctness. e.g. + // %reg1041;<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6 + // %reg1042<def> = EXTRACT_SUBREG %reg1041, 6 + // %reg1043<def> = EXTRACT_SUBREG %reg1041, 5 + // VST1q16 %reg1025<kill>, 0, %reg1043<kill>, %reg1042<kill>, + // reg1025 and reg1043 should be replaced with reg1041:6 and reg1041:5 + // respectively. + // We need to change how we model uses of REG_SEQUENCE. + for (unsigned R = 0; R < NumRegs; ++R) { + MachineOperand &MO = MI->getOperand(FirstOpnd + R); + unsigned OldReg = MO.getReg(); + MachineInstr *DefMI = MRI->getVRegDef(OldReg); + assert(DefMI->isExtractSubreg()); + MO.setReg(LastSrcReg); + MO.setSubReg(SubIds[R]); + if (R != 0) + MO.setIsKill(false); + // Delete the EXTRACT_SUBREG if its result is now dead. + if (MRI->use_empty(OldReg)) + DefMI->eraseFromParent(); + } + return true; } @@ -368,7 +467,8 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) { unsigned FirstOpnd, NumRegs, Offset, Stride; if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride)) continue; - if (FormsRegSequence(MI, FirstOpnd, NumRegs)) + if (llvm::ModelWithRegSequence() && + FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride)) continue; MachineBasicBlock::iterator NextI = llvm::next(MBBI); @@ -390,7 +490,8 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) { if (MO.isUse()) { // Insert a copy from VirtReg. TII->copyRegToReg(MBB, MBBI, MO.getReg(), VirtReg, - ARM::DPRRegisterClass, ARM::DPRRegisterClass); + ARM::DPRRegisterClass, ARM::DPRRegisterClass, + DebugLoc()); if (MO.isKill()) { MachineInstr *CopyMI = prior(MBBI); CopyMI->findRegisterUseOperand(VirtReg)->setIsKill(); @@ -399,7 +500,8 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) { } else if (MO.isDef() && !MO.isDead()) { // Add a copy to VirtReg. TII->copyRegToReg(MBB, NextI, VirtReg, MO.getReg(), - ARM::DPRRegisterClass, ARM::DPRRegisterClass); + ARM::DPRRegisterClass, ARM::DPRRegisterClass, + DebugLoc()); } } } diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index b10c3f7..fae84d4 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -17,6 +17,7 @@ #include "ARMMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/ADT/SmallVector.h" @@ -36,10 +37,8 @@ bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { if (DestRC == ARM::GPRRegisterClass) { if (SrcRC == ARM::GPRRegisterClass) { BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg); @@ -97,10 +96,8 @@ canFoldMemoryOperand(const MachineInstr *MI, void Thumb1InstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { assert((RC == ARM::tGPRRegisterClass || (TargetRegisterInfo::isPhysicalRegister(SrcReg) && isARMLowRegister(SrcReg))) && "Unknown regclass!"); @@ -108,6 +105,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (RC == ARM::tGPRRegisterClass || (TargetRegisterInfo::isPhysicalRegister(SrcReg) && isARMLowRegister(SrcReg))) { + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = @@ -124,10 +124,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, void Thumb1InstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, - const TargetRegisterClass *RC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { assert((RC == ARM::tGPRRegisterClass || (TargetRegisterInfo::isPhysicalRegister(DestReg) && isARMLowRegister(DestReg))) && "Unknown regclass!"); @@ -135,6 +133,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (RC == ARM::tGPRRegisterClass || (TargetRegisterInfo::isPhysicalRegister(DestReg) && isARMLowRegister(DestReg))) { + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = @@ -150,7 +151,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool Thumb1InstrInfo:: spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const { + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -161,9 +163,22 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, AddDefaultPred(MIB); for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - // Add the callee-saved register as live-in. It's killed at the spill. - MBB.addLiveIn(Reg); - MIB.addReg(Reg, RegState::Kill); + bool isKill = true; + + // Add the callee-saved register as live-in unless it's LR and + // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress + // then it's already added to the function and entry block live-in sets. + if (Reg == ARM::LR) { + MachineFunction &MF = *MBB.getParent(); + if (MF.getFrameInfo()->isReturnAddressTaken() && + MF.getRegInfo().isLiveIn(Reg)) + isKill = false; + } + + if (isKill) { + MBB.addLiveIn(Reg); + MIB.addReg(Reg, RegState::Kill); + } } return true; } @@ -171,7 +186,8 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, bool Thumb1InstrInfo:: restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const { + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); if (CSI.empty()) diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index 516ddf1..c937296 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -39,25 +39,30 @@ public: bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; bool canFoldMemoryOperand(const MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops) const; diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index b143bd9..531d5e9 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -40,10 +40,8 @@ Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { if (DestRC == ARM::GPRRegisterClass) { if (SrcRC == ARM::GPRRegisterClass) { BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg); @@ -63,17 +61,18 @@ Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB, } // Handle SPR, DPR, and QPR copies. - return ARMBaseInstrInfo::copyRegToReg(MBB, I, DestReg, SrcReg, DestRC, SrcRC); + return ARMBaseInstrInfo::copyRegToReg(MBB, I, DestReg, SrcReg, DestRC, SrcRC, DL); } void Thumb2InstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) { + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = @@ -87,17 +86,18 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, return; } - ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC); + ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI); } void Thumb2InstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, - const TargetRegisterClass *RC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) { + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = @@ -110,7 +110,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, return; } - ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC); + ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI); } void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index a0f89a6..2948770 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -35,17 +35,20 @@ public: MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp index ba403e2..3aba363 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.cpp +++ b/lib/Target/Alpha/AlphaInstrInfo.cpp @@ -146,16 +146,14 @@ bool AlphaInstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { //cerr << "copyRegToReg " << DestReg << " <- " << SrcReg << "\n"; if (DestRC != SrcRC) { // Not yet supported! return false; } - DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - if (DestRC == Alpha::GPRCRegisterClass) { BuildMI(MBB, MI, DL, get(Alpha::BISr), DestReg) .addReg(SrcReg) @@ -180,7 +178,8 @@ void AlphaInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIdx, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { //cerr << "Trying to store " << getPrettyName(SrcReg) << " to " // << FrameIdx << "\n"; //BuildMI(MBB, MI, Alpha::WTF, 0).addReg(SrcReg); @@ -208,7 +207,8 @@ void AlphaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { //cerr << "Trying to load " << getPrettyName(DestReg) << " to " // << FrameIdx << "\n"; DebugLoc DL; @@ -399,7 +399,6 @@ unsigned AlphaInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { void AlphaInstrInfo::insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); BuildMI(MBB, MI, DL, get(Alpha::BISr), Alpha::R31) .addReg(Alpha::R31) .addReg(Alpha::R31); @@ -430,7 +429,8 @@ unsigned AlphaInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { GlobalBaseReg = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass); bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, Alpha::R29, - &Alpha::GPRCRegClass, &Alpha::GPRCRegClass); + &Alpha::GPRCRegClass, &Alpha::GPRCRegClass, + DebugLoc()); assert(Ok && "Couldn't assign to global base register!"); Ok = Ok; // Silence warning when assertions are turned off. RegInfo.addLiveIn(Alpha::R29); @@ -457,7 +457,8 @@ unsigned AlphaInstrInfo::getGlobalRetAddr(MachineFunction *MF) const { GlobalRetAddr = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass); bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalRetAddr, Alpha::R26, - &Alpha::GPRCRegClass, &Alpha::GPRCRegClass); + &Alpha::GPRCRegClass, &Alpha::GPRCRegClass, + DebugLoc()); assert(Ok && "Couldn't assign to global return address register!"); Ok = Ok; // Silence warning when assertions are turned off. RegInfo.addLiveIn(Alpha::R26); diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h index c3b6044..7d7365b 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.h +++ b/lib/Target/Alpha/AlphaInstrInfo.h @@ -48,16 +48,19 @@ public: MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td index d5d5e02..a47a29b 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.td +++ b/lib/Target/Alpha/AlphaInstrInfo.td @@ -836,7 +836,7 @@ class br_fcc<bits<6> opc, string asmstr> !strconcat(asmstr, " $R,$dst"), s_fbr>; let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { -let Ra = 31 in +let Ra = 31, isBarrier = 1 in def BR : BFormD<0x30, "br $$31,$DISP", [(br bb:$DISP)], s_ubr>; def COND_BRANCH_I : BFormN<0, (ins u64imm:$opc, GPRC:$R, target:$dst), diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp b/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp index 0eb7b8f..f1958fe 100644 --- a/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp +++ b/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "alpha-selectiondag-info" -#include "AlphaSelectionDAGInfo.h" +#include "AlphaTargetMachine.h" using namespace llvm; -AlphaSelectionDAGInfo::AlphaSelectionDAGInfo() { +AlphaSelectionDAGInfo::AlphaSelectionDAGInfo(const AlphaTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } AlphaSelectionDAGInfo::~AlphaSelectionDAGInfo() { diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.h b/lib/Target/Alpha/AlphaSelectionDAGInfo.h index 70889ae..3405cc0 100644 --- a/lib/Target/Alpha/AlphaSelectionDAGInfo.h +++ b/lib/Target/Alpha/AlphaSelectionDAGInfo.h @@ -18,9 +18,11 @@ namespace llvm { +class AlphaTargetMachine; + class AlphaSelectionDAGInfo : public TargetSelectionDAGInfo { public: - AlphaSelectionDAGInfo(); + explicit AlphaSelectionDAGInfo(const AlphaTargetMachine &TM); ~AlphaSelectionDAGInfo(); }; diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp index 5169a01..fc9be03 100644 --- a/lib/Target/Alpha/AlphaTargetMachine.cpp +++ b/lib/Target/Alpha/AlphaTargetMachine.cpp @@ -32,7 +32,8 @@ AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT, FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0), JITInfo(*this), Subtarget(TT, FS), - TLInfo(*this) { + TLInfo(*this), + TSInfo(*this) { setRelocationModel(Reloc::PIC_); } diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h index 0990f6d..153944e 100644 --- a/lib/Target/Alpha/AlphaTargetMachine.h +++ b/lib/Target/Alpha/AlphaTargetMachine.h @@ -20,6 +20,7 @@ #include "AlphaInstrInfo.h" #include "AlphaJITInfo.h" #include "AlphaISelLowering.h" +#include "AlphaSelectionDAGInfo.h" #include "AlphaSubtarget.h" namespace llvm { @@ -33,6 +34,7 @@ class AlphaTargetMachine : public LLVMTargetMachine { AlphaJITInfo JITInfo; AlphaSubtarget Subtarget; AlphaTargetLowering TLInfo; + AlphaSelectionDAGInfo TSInfo; public: AlphaTargetMachine(const Target &T, const std::string &TT, @@ -47,6 +49,9 @@ public: virtual const AlphaTargetLowering* getTargetLowering() const { return &TLInfo; } + virtual const AlphaSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } virtual const TargetData *getTargetData() const { return &DataLayout; } virtual AlphaJITInfo* getJITInfo() { return &JITInfo; diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp index cf1901b..73924b7 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp +++ b/lib/Target/Blackfin/BlackfinInstrInfo.cpp @@ -138,9 +138,8 @@ bool BlackfinInstrInfo::copyRegToReg(MachineBasicBlock &MBB, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL; - + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { if (inClass(BF::ALLRegClass, DestReg, DestRC) && inClass(BF::ALLRegClass, SrcReg, SrcRC)) { BuildMI(MBB, I, DL, get(BF::MOVE), DestReg).addReg(SrcReg); @@ -196,7 +195,8 @@ BlackfinInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); if (inClass(BF::DPRegClass, SrcReg, RC)) { @@ -242,7 +242,8 @@ BlackfinInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); if (inClass(BF::DPRegClass, DestReg, RC)) { BuildMI(MBB, I, DL, get(BF::LOAD32fi), DestReg) diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.h b/lib/Target/Blackfin/BlackfinInstrInfo.h index ea3429c..c1dcd58 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.h +++ b/lib/Target/Blackfin/BlackfinInstrInfo.h @@ -50,13 +50,15 @@ namespace llvm { MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, @@ -67,7 +69,8 @@ namespace llvm { virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, SmallVectorImpl<MachineOperand> &Addr, diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td index 2471688..5cf350a 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.td +++ b/lib/Target/Blackfin/BlackfinInstrInfo.td @@ -301,9 +301,9 @@ def LOAD32p_8z: F1<(outs D:$dst), (ins P:$ptr), def : Pat<(i32 (extloadi8 P:$ptr)), (LOAD32p_8z P:$ptr)>; def : Pat<(i16 (extloadi8 P:$ptr)), - (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), bfin_subreg_lo16)>; + (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>; def : Pat<(i16 (zextloadi8 P:$ptr)), - (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), bfin_subreg_lo16)>; + (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>; def LOAD32p_imm16_8z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off), "$dst = b[$ptr + $off] (z);", @@ -313,17 +313,17 @@ def : Pat<(i32 (extloadi8 (add P:$ptr, imm16:$off))), (LOAD32p_imm16_8z P:$ptr, imm:$off)>; def : Pat<(i16 (extloadi8 (add P:$ptr, imm16:$off))), (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off), - bfin_subreg_lo16)>; + lo16)>; def : Pat<(i16 (zextloadi8 (add P:$ptr, imm16:$off))), (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off), - bfin_subreg_lo16)>; + lo16)>; def LOAD32p_8s: F1<(outs D:$dst), (ins P:$ptr), "$dst = b[$ptr] (x);", [(set D:$dst, (sextloadi8 P:$ptr))]>; def : Pat<(i16 (sextloadi8 P:$ptr)), - (EXTRACT_SUBREG (LOAD32p_8s P:$ptr), bfin_subreg_lo16)>; + (EXTRACT_SUBREG (LOAD32p_8s P:$ptr), lo16)>; def LOAD32p_imm16_8s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off), "$dst = b[$ptr + $off] (x);", @@ -331,7 +331,7 @@ def LOAD32p_imm16_8s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off), def : Pat<(i16 (sextloadi8 (add P:$ptr, imm16:$off))), (EXTRACT_SUBREG (LOAD32p_imm16_8s P:$ptr, imm:$off), - bfin_subreg_lo16)>; + lo16)>; // Memory loads without patterns let mayLoad = 1 in { @@ -468,16 +468,16 @@ def STORE32i_post: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr, M:$off), def : Pat<(truncstorei16 D:$val, PI:$ptr), (STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)), - bfin_subreg_lo16), PI:$ptr)>; + lo16), PI:$ptr)>; def : Pat<(truncstorei16 (srl D:$val, (i16 16)), PI:$ptr), (STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)), - bfin_subreg_hi16), PI:$ptr)>; + hi16), PI:$ptr)>; def : Pat<(truncstorei8 D16L:$val, P:$ptr), (STORE8p (INSERT_SUBREG (i32 (IMPLICIT_DEF)), (i16 (COPY_TO_REGCLASS D16L:$val, D16L)), - bfin_subreg_lo16), + lo16), P:$ptr)>; //===----------------------------------------------------------------------===// @@ -516,19 +516,19 @@ def : Pat<(sext_inreg D16L:$src, i8), (EXTRACT_SUBREG (MOVEsext8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), D16L:$src, - bfin_subreg_lo16)), - bfin_subreg_lo16)>; + lo16)), + lo16)>; def : Pat<(sext_inreg D:$src, i16), - (MOVEsext (EXTRACT_SUBREG D:$src, bfin_subreg_lo16))>; + (MOVEsext (EXTRACT_SUBREG D:$src, lo16))>; def : Pat<(and D:$src, 0xffff), - (MOVEzext (EXTRACT_SUBREG D:$src, bfin_subreg_lo16))>; + (MOVEzext (EXTRACT_SUBREG D:$src, lo16))>; def : Pat<(i32 (anyext D16L:$src)), (INSERT_SUBREG (i32 (IMPLICIT_DEF)), (i16 (COPY_TO_REGCLASS D16L:$src, D16L)), - bfin_subreg_lo16)>; + lo16)>; // TODO Dreg = Dreg_byte (X/Z) @@ -859,4 +859,4 @@ def : Pat<(BfinCall (i32 tglobaladdr:$dst)), def : Pat<(BfinCall (i32 texternalsym:$dst)), (CALLa texternalsym:$dst)>; def : Pat<(i16 (trunc D:$src)), - (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$src, D)), bfin_subreg_lo16)>; + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$src, D)), lo16)>; diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp index 2512c9b..5153ace 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp @@ -111,7 +111,7 @@ BlackfinRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, EVT VT) const { bool BlackfinRegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); return DisableFramePointerElim(MF) || - MFI->hasCalls() || MFI->hasVarSizedObjects(); + MFI->adjustsStack() || MFI->hasVarSizedObjects(); } bool BlackfinRegisterInfo:: @@ -177,11 +177,11 @@ void BlackfinRegisterInfo::loadConstant(MachineBasicBlock &MBB, // We must split into halves BuildMI(MBB, I, DL, - TII.get(BF::LOAD16i), getSubReg(Reg, bfin_subreg_hi16)) + TII.get(BF::LOAD16i), getSubReg(Reg, BF::hi16)) .addImm((value >> 16) & 0xffff) .addReg(Reg, RegState::ImplicitDefine); BuildMI(MBB, I, DL, - TII.get(BF::LOAD16i), getSubReg(Reg, bfin_subreg_lo16)) + TII.get(BF::LOAD16i), getSubReg(Reg, BF::lo16)) .addImm(value & 0xffff) .addReg(Reg, RegState::ImplicitKill) .addReg(Reg, RegState::ImplicitDefine); @@ -394,7 +394,7 @@ void BlackfinRegisterInfo::emitPrologue(MachineFunction &MF) const { } if (!hasFP(MF)) { - assert(!MFI->hasCalls() && + assert(!MFI->adjustsStack() && "FP elimination on a non-leaf function is not supported"); adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, -FrameSize); return; @@ -435,7 +435,7 @@ void BlackfinRegisterInfo::emitEpilogue(MachineFunction &MF, assert(FrameSize%4 == 0 && "Misaligned frame size"); if (!hasFP(MF)) { - assert(!MFI->hasCalls() && + assert(!MFI->adjustsStack() && "FP elimination on a non-leaf function is not supported"); adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, FrameSize); return; diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h index 7cfb120..03c5450 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.h +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h @@ -24,13 +24,6 @@ namespace llvm { class TargetInstrInfo; class Type; - // Subregister indices, keep in sync with BlackfinRegisterInfo.td - enum BfinSubregIdx { - bfin_subreg_lo16 = 1, - bfin_subreg_hi16 = 2, - bfin_subreg_lo32 = 3 - }; - struct BlackfinRegisterInfo : public BlackfinGenRegisterInfo { BlackfinSubtarget &Subtarget; const TargetInstrInfo &TII; diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.td b/lib/Target/Blackfin/BlackfinRegisterInfo.td index d396cc8..e1cfae9 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.td +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.td @@ -11,8 +11,18 @@ // Declarations that describe the Blackfin register file //===----------------------------------------------------------------------===// -// Registers are identified with 3-bit group and 3-bit ID numbers. +// Subregs are: +// 1: .L +// 2: .H +// 3: .W (32 low bits of 40-bit accu) +let Namespace = "BF" in { +def lo16 : SubRegIndex; +def hi16 : SubRegIndex; +def lo32 : SubRegIndex; +def hi32 : SubRegIndex; +} +// Registers are identified with 3-bit group and 3-bit ID numbers. class BlackfinReg<string n> : Register<n> { field bits<3> Group; field bits<3> Num; @@ -40,6 +50,7 @@ class Ri<bits<3> group, bits<3> num, string n> : BlackfinReg<n> { // Ra 40-bit accumulator registers class Ra<bits<3> num, string n, list<Register> subs> : BlackfinReg<n> { let SubRegs = subs; + let SubRegIndices = [hi32, lo32]; let Group = 4; let Num = num; } @@ -54,6 +65,7 @@ multiclass Rss<bits<3> group, bits<3> num, string n> { class Rii<bits<3> group, bits<3> num, string n, list<Register> subs> : BlackfinReg<n> { let SubRegs = subs; + let SubRegIndices = [hi16, lo16]; let Group = group; let Num = num; } @@ -164,7 +176,7 @@ def RETN : Ri<7, 5, "retn">, DwarfRegNum<[38]>; def RETE : Ri<7, 6, "rete">, DwarfRegNum<[39]>; def ASTAT : Ri<4, 6, "astat">, DwarfRegNum<[40]> { - let SubRegs = [AZ, AN, CC, NCC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS]; + let Aliases = [AZ, AN, CC, NCC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS]; } def SEQSTAT : Ri<7, 1, "seqstat">, DwarfRegNum<[41]>; @@ -182,38 +194,6 @@ def LC1 : Ri<6, 3, "lc1">, DwarfRegNum<[47]>; def LB0 : Ri<6, 2, "lb0">, DwarfRegNum<[48]>; def LB1 : Ri<6, 5, "lb1">, DwarfRegNum<[49]>; -// Subregs are: -// 1: .L -// 2: .H -// 3: .W (32 low bits of 40-bit accu) -// Keep in sync with enum in BlackfinRegisterInfo.h -def bfin_subreg_lo16 : PatLeaf<(i32 1)>; -def bfin_subreg_hi16 : PatLeaf<(i32 2)>; -def bfin_subreg_32bit : PatLeaf<(i32 3)>; - -def : SubRegSet<1, - [R0, R1, R2, R3, R4, R5, R6, R7, - P0, P1, P2, P3, P4, P5, SP, FP, - I0, I1, I2, I3, M0, M1, M2, M3, - B0, B1, B2, B3, L0, L1, L2, L3], - [R0L, R1L, R2L, R3L, R4L, R5L, R6L, R7L, - P0L, P1L, P2L, P3L, P4L, P5L, SPL, FPL, - I0L, I1L, I2L, I3L, M0L, M1L, M2L, M3L, - B0L, B1L, B2L, B3L, L0L, L1L, L2L, L3L]>; - -def : SubRegSet<2, - [R0, R1, R2, R3, R4, R5, R6, R7, - P0, P1, P2, P3, P4, P5, SP, FP, - I0, I1, I2, I3, M0, M1, M2, M3, - B0, B1, B2, B3, L0, L1, L2, L3], - [R0H, R1H, R2H, R3H, R4H, R5H, R6H, R7H, - P0H, P1H, P2H, P3H, P4H, P5H, SPH, FPH, - I0H, I1H, I2H, I3H, M0H, M1H, M2H, M3H, - B0H, B1H, B2H, B3H, L0H, L1H, L2H, L3H]>; - -def : SubRegSet<1, [A0, A0W, A1, A1W], [A0L, A0L, A1L, A1L]>; -def : SubRegSet<2, [A0, A0W, A1, A1W], [A0H, A0H, A1H, A1H]>; - // Register classes. def D16 : RegisterClass<"BF", [i16], 16, [R0H, R0L, R1H, R1L, R2H, R2L, R3H, R3L, @@ -260,11 +240,11 @@ def GR16 : RegisterClass<"BF", [i16], 16, L0H, L0L, L1H, L1L, L2H, L2L, L3H, L3L]>; def D : RegisterClass<"BF", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> { - let SubRegClassList = [D16L, D16H]; + let SubRegClasses = [(D16L lo16), (D16H hi16)]; } def P : RegisterClass<"BF", [i32], 32, [P0, P1, P2, P3, P4, P5, FP, SP]> { - let SubRegClassList = [P16L, P16H]; + let SubRegClasses = [(P16L lo16), (P16H hi16)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -287,7 +267,7 @@ def L : RegisterClass<"BF", [i32], 32, [L0, L1, L2, L3]>; def DP : RegisterClass<"BF", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7, P0, P1, P2, P3, P4, P5, FP, SP]> { - let SubRegClassList = [DP16L, DP16H]; + let SubRegClasses = [(DP16L lo16), (DP16H hi16)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp index f4bb25f..a21f696 100644 --- a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp +++ b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp @@ -12,10 +12,12 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "blackfin-selectiondag-info" -#include "BlackfinSelectionDAGInfo.h" +#include "BlackfinTargetMachine.h" using namespace llvm; -BlackfinSelectionDAGInfo::BlackfinSelectionDAGInfo() { +BlackfinSelectionDAGInfo::BlackfinSelectionDAGInfo( + const BlackfinTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } BlackfinSelectionDAGInfo::~BlackfinSelectionDAGInfo() { diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h index a620330..f1ce348 100644 --- a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h +++ b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h @@ -18,9 +18,11 @@ namespace llvm { +class BlackfinTargetMachine; + class BlackfinSelectionDAGInfo : public TargetSelectionDAGInfo { public: - BlackfinSelectionDAGInfo(); + explicit BlackfinSelectionDAGInfo(const BlackfinTargetMachine &TM); ~BlackfinSelectionDAGInfo(); }; diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/lib/Target/Blackfin/BlackfinTargetMachine.cpp index 45d7c35..66a2f68 100644 --- a/lib/Target/Blackfin/BlackfinTargetMachine.cpp +++ b/lib/Target/Blackfin/BlackfinTargetMachine.cpp @@ -31,6 +31,7 @@ BlackfinTargetMachine::BlackfinTargetMachine(const Target &T, DataLayout("e-p:32:32-i64:32-f64:32-n32"), Subtarget(TT, FS), TLInfo(*this), + TSInfo(*this), InstrInfo(Subtarget), FrameInfo(TargetFrameInfo::StackGrowsDown, 4, 0) { } diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.h b/lib/Target/Blackfin/BlackfinTargetMachine.h index 07e7394..a63aa54 100644 --- a/lib/Target/Blackfin/BlackfinTargetMachine.h +++ b/lib/Target/Blackfin/BlackfinTargetMachine.h @@ -20,6 +20,7 @@ #include "BlackfinInstrInfo.h" #include "BlackfinSubtarget.h" #include "BlackfinISelLowering.h" +#include "BlackfinSelectionDAGInfo.h" #include "BlackfinIntrinsicInfo.h" namespace llvm { @@ -28,6 +29,7 @@ namespace llvm { const TargetData DataLayout; BlackfinSubtarget Subtarget; BlackfinTargetLowering TLInfo; + BlackfinSelectionDAGInfo TSInfo; BlackfinInstrInfo InstrInfo; TargetFrameInfo FrameInfo; BlackfinIntrinsicInfo IntrinsicInfo; @@ -46,6 +48,9 @@ namespace llvm { virtual const BlackfinTargetLowering* getTargetLowering() const { return &TLInfo; } + virtual const BlackfinSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } virtual const TargetData *getTargetData() const { return &DataLayout; } virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 67f513b..55b8aaa 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -2165,6 +2165,9 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { case CallingConv::X86_FastCall: Out << "__attribute__((fastcall)) "; break; + case CallingConv::X86_ThisCall: + Out << "__attribute__((thiscall)) "; + break; default: break; } @@ -3554,11 +3557,11 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) { // External Interface declaration //===----------------------------------------------------------------------===// -bool CTargetMachine::addPassesToEmitWholeFile(PassManager &PM, - formatted_raw_ostream &o, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, - bool DisableVerify) { +bool CTargetMachine::addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &o, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel, + bool DisableVerify) { if (FileType != TargetMachine::CGFT_AssemblyFile) return true; PM.add(createGCLoweringPass()); diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h index d178e7f..6fed195 100644 --- a/lib/Target/CBackend/CTargetMachine.h +++ b/lib/Target/CBackend/CTargetMachine.h @@ -23,12 +23,11 @@ struct CTargetMachine : public TargetMachine { CTargetMachine(const Target &T, const std::string &TT, const std::string &FS) : TargetMachine(T) {} - virtual bool WantsWholeFile() const { return true; } - virtual bool addPassesToEmitWholeFile(PassManager &PM, - formatted_raw_ostream &Out, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, - bool DisableVerify); + virtual bool addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &Out, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel, + bool DisableVerify); virtual const TargetData *getTargetData() const { return 0; } }; diff --git a/lib/Target/CellSPU/README.txt b/lib/Target/CellSPU/README.txt index 4783dd5..0e7ad35 100644 --- a/lib/Target/CellSPU/README.txt +++ b/lib/Target/CellSPU/README.txt @@ -10,6 +10,8 @@ Department in The Aerospace Corporation: - Chandler Carruth (LLVM expertise) - Nehal Desai (debugging, i32 operations, RoadRunner SPU expertise) +Some minor fixes added by Kalle Raiskila. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 5e04454..081e8d0 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -485,7 +485,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // Set pre-RA register scheduler default to BURR, which produces slightly // better code than the default (could also be TDRR, but TargetLowering.h // needs a mod to support that model): - setSchedulingPreference(SchedulingForRegPressure); + setSchedulingPreference(Sched::RegPressure); } const char * diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index 423da3b..4c53c98 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -255,16 +255,14 @@ bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { // We support cross register class moves for our aliases, such as R3 in any // reg class to any other reg class containing R3. This is required because // we instruction select bitconvert i64 -> f64 as a noop for example, so our // types have no specific meaning. - DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - if (DestRC == SPU::R8CRegisterClass) { BuildMI(MBB, MI, DL, get(SPU::LRr8), DestReg).addReg(SrcReg); } else if (DestRC == SPU::R16CRegisterClass) { @@ -291,9 +289,10 @@ bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB, void SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned SrcReg, bool isKill, int FrameIdx, - const TargetRegisterClass *RC) const + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { unsigned opc; bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset()); @@ -325,9 +324,10 @@ SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, void SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC) const + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { unsigned opc; bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset()); @@ -467,6 +467,9 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, // If there is only one terminator instruction, process it. if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { if (isUncondBranch(LastInst)) { + // Check for jump tables + if (!LastInst->getOperand(0).isMBB()) + return true; TBB = LastInst->getOperand(0).getMBB(); return false; } else if (isCondBranch(LastInst)) { diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h index 42677fc..6dabd7c 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.h +++ b/lib/Target/CellSPU/SPUInstrInfo.h @@ -60,19 +60,22 @@ namespace llvm { MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; //! Store a register to a stack slot, based on its register class. virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; //! Load a register from a stack slot, based on its register class. virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; //! Return true if the specified load or store can be folded virtual diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 6d1f87d..a7fb14c 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -655,7 +655,7 @@ def SFHvec: def SFHr16: RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), "sfh\t$rT, $rA, $rB", IntegerOp, - [(set R16C:$rT, (sub R16C:$rA, R16C:$rB))]>; + [(set R16C:$rT, (sub R16C:$rB, R16C:$rA))]>; def SFHIvec: RI10Form<0b10110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), @@ -670,11 +670,11 @@ def SFHIr16 : RI10Form<0b10110000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), def SFvec : RRForm<0b00000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), "sf\t$rT, $rA, $rB", IntegerOp, - [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>; def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), "sf\t$rT, $rA, $rB", IntegerOp, - [(set R32C:$rT, (sub R32C:$rA, R32C:$rB))]>; + [(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>; def SFIvec: RI10Form<0b00110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index fdbe10f..d8937ec 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -469,7 +469,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const && "SPURegisterInfo::emitPrologue: FrameSize not aligned"); // the "empty" frame size is 16 - just the register scavenger spill slot - if (FrameSize > 16 || MFI->hasCalls()) { + if (FrameSize > 16 || MFI->adjustsStack()) { FrameSize = -(FrameSize + SPUFrameInfo::minStackSize()); if (hasDebugInfo) { // Mark effective beginning of when frame pointer becomes valid. @@ -569,7 +569,7 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const && "SPURegisterInfo::emitEpilogue: FrameSize not aligned"); // the "empty" frame size is 16 - just the register scavenger spill slot - if (FrameSize > 16 || MFI->hasCalls()) { + if (FrameSize > 16 || MFI->adjustsStack()) { FrameSize = FrameSize + SPUFrameInfo::minStackSize(); if (isInt<10>(FrameSize + LinkSlotOffset)) { // Reload $lr, adjust $sp by required amount diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp b/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp index ca2a4bf..5732fd4 100644 --- a/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp +++ b/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "cellspu-selectiondag-info" -#include "SPUSelectionDAGInfo.h" +#include "SPUTargetMachine.h" using namespace llvm; -SPUSelectionDAGInfo::SPUSelectionDAGInfo() { +SPUSelectionDAGInfo::SPUSelectionDAGInfo(const SPUTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } SPUSelectionDAGInfo::~SPUSelectionDAGInfo() { diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.h b/lib/Target/CellSPU/SPUSelectionDAGInfo.h index 0a6b4c1..39257d9 100644 --- a/lib/Target/CellSPU/SPUSelectionDAGInfo.h +++ b/lib/Target/CellSPU/SPUSelectionDAGInfo.h @@ -18,9 +18,11 @@ namespace llvm { +class SPUTargetMachine; + class SPUSelectionDAGInfo : public TargetSelectionDAGInfo { public: - SPUSelectionDAGInfo(); + explicit SPUSelectionDAGInfo(const SPUTargetMachine &TM); ~SPUSelectionDAGInfo(); }; diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp index 6500067..480ec3f 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -42,6 +42,7 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT, InstrInfo(*this), FrameInfo(*this), TLInfo(*this), + TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { // For the time being, use static relocations, since there's really no // support for PIC yet. diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h index 37e7cd2..7e02701 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.h +++ b/lib/Target/CellSPU/SPUTargetMachine.h @@ -17,6 +17,7 @@ #include "SPUSubtarget.h" #include "SPUInstrInfo.h" #include "SPUISelLowering.h" +#include "SPUSelectionDAGInfo.h" #include "SPUFrameInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" @@ -34,6 +35,7 @@ class SPUTargetMachine : public LLVMTargetMachine { SPUInstrInfo InstrInfo; SPUFrameInfo FrameInfo; SPUTargetLowering TLInfo; + SPUSelectionDAGInfo TSInfo; InstrItineraryData InstrItins; public: SPUTargetMachine(const Target &T, const std::string &TT, @@ -61,6 +63,10 @@ public: return &TLInfo; } + virtual const SPUSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + virtual const SPURegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index e739b26..45a0c84 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -1038,6 +1038,11 @@ namespace { Out << ");"; nl(Out); } + if (GV->isThreadLocal()) { + printCppName(GV); + Out << "->setThreadLocal(true);"; + nl(Out); + } if (is_inline) { out(); Out << "}"; nl(Out); } @@ -2007,11 +2012,11 @@ char CppWriter::ID = 0; // External Interface declaration //===----------------------------------------------------------------------===// -bool CPPTargetMachine::addPassesToEmitWholeFile(PassManager &PM, - formatted_raw_ostream &o, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, - bool DisableVerify) { +bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &o, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel, + bool DisableVerify) { if (FileType != TargetMachine::CGFT_AssemblyFile) return true; PM.add(new CppWriter(o)); return false; diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h index b7aae91..e42166e 100644 --- a/lib/Target/CppBackend/CPPTargetMachine.h +++ b/lib/Target/CppBackend/CPPTargetMachine.h @@ -26,12 +26,11 @@ struct CPPTargetMachine : public TargetMachine { const std::string &FS) : TargetMachine(T) {} - virtual bool WantsWholeFile() const { return true; } - virtual bool addPassesToEmitWholeFile(PassManager &PM, - formatted_raw_ostream &Out, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, - bool DisableVerify); + virtual bool addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &Out, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel, + bool DisableVerify); virtual const TargetData *getTargetData() const { return 0; } }; diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp index 04dfb0a..e42e9b3 100644 --- a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp +++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp @@ -155,7 +155,7 @@ void MBlazeAsmPrinter::printSavedRegsBitmask(raw_ostream &O) { CPUBitmask |= (1 << MBlazeRegisterInfo:: getRegisterNumbering(RI.getFrameRegister(*MF))); - if (MFI->hasCalls()) + if (MFI->adjustsStack()) CPUBitmask |= (1 << MBlazeRegisterInfo:: getRegisterNumbering(RI.getRARegister())); diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp index 01f3174..4c4d86b 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp +++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp @@ -107,7 +107,6 @@ isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { void MBlazeInstrInfo:: insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); BuildMI(MBB, MI, DL, get(MBlaze::NOP)); } @@ -115,8 +114,8 @@ bool MBlazeInstrInfo:: copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { llvm::BuildMI(MBB, I, DL, get(MBlaze::ADD), DestReg) .addReg(SrcReg).addReg(MBlaze::R0); return true; @@ -125,7 +124,8 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, void MBlazeInstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; BuildMI(MBB, I, DL, get(MBlaze::SWI)).addReg(SrcReg,getKillRegState(isKill)) .addImm(0).addFrameIndex(FI); @@ -134,7 +134,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, void MBlazeInstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; BuildMI(MBB, I, DL, get(MBlaze::LWI), DestReg) .addImm(0).addFrameIndex(FI); @@ -210,7 +211,8 @@ unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::CPURegsRegisterClass); bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, MBlaze::R20, MBlaze::CPURegsRegisterClass, - MBlaze::CPURegsRegisterClass); + MBlaze::CPURegsRegisterClass, + DebugLoc()); assert(Ok && "Couldn't assign to global base register!"); Ok = Ok; // Silence warning when assertions are turned off. RegInfo.addLiveIn(MBlaze::R20); diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h index 4f79f1c..c9fdc88 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.h +++ b/lib/Target/MBlaze/MBlazeInstrInfo.h @@ -203,16 +203,19 @@ public: MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index e15176e..f15eea9 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -220,7 +220,7 @@ void MBlazeRegisterInfo::adjustMBlazeStackFrame(MachineFunction &MF) const { StackOffset += RegSize; } - if (MFI->hasCalls()) { + if (MFI->adjustsStack()) { MBlazeFI->setRAStackOffset(0); MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), StackOffset); @@ -311,8 +311,8 @@ emitPrologue(MachineFunction &MF) const { unsigned StackSize = MFI->getStackSize(); // No need to allocate space on the stack. - if (StackSize == 0 && !MFI->hasCalls()) return; - if (StackSize < 28 && MFI->hasCalls()) StackSize = 28; + if (StackSize == 0 && !MFI->adjustsStack()) return; + if (StackSize < 28 && MFI->adjustsStack()) StackSize = 28; int FPOffset = MBlazeFI->getFPStackOffset(); int RAOffset = MBlazeFI->getRAStackOffset(); @@ -323,7 +323,7 @@ emitPrologue(MachineFunction &MF) const { // Save the return address only if the function isnt a leaf one. // swi R15, R1, stack_loc - if (MFI->hasCalls()) { + if (MFI->adjustsStack()) { BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI)) .addReg(MBlaze::R15).addImm(RAOffset).addReg(MBlaze::R1); } @@ -366,14 +366,14 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { // Restore the return address only if the function isnt a leaf one. // lwi R15, R1, stack_loc - if (MFI->hasCalls()) { + if (MFI->adjustsStack()) { BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R15) .addImm(RAOffset).addReg(MBlaze::R1); } // Get the number of bytes from FrameInfo int StackSize = (int) MFI->getStackSize(); - if (StackSize < 28 && MFI->hasCalls()) StackSize = 28; + if (StackSize < 28 && MFI->adjustsStack()) StackSize = 28; // adjust stack. // addi R1, R1, imm diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td index 96a5c98..d0a1e75 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.td +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td @@ -17,21 +17,15 @@ class MBlazeReg<string n> : Register<n> { let Namespace = "MBlaze"; } -class MBlazeRegWithSubRegs<string n, list<Register> subregs> - : RegisterWithSubRegs<n, subregs> { - field bits<5> Num; - let Namespace = "MBlaze"; -} - // MBlaze CPU Registers class MBlazeGPRReg<bits<5> num, string n> : MBlazeReg<n> { let Num = num; } // MBlaze 32-bit (aliased) FPU Registers -class FPR<bits<5> num, string n, list<Register> subregs> - : MBlazeRegWithSubRegs<n, subregs> { +class FPR<bits<5> num, string n, list<Register> aliases> : MBlazeReg<n> { let Num = num; + let Aliases = aliases; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp index 105e42a..6a115b2 100644 --- a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp +++ b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mblaze-selectiondag-info" -#include "MBlazeSelectionDAGInfo.h" +#include "MBlazeTargetMachine.h" using namespace llvm; -MBlazeSelectionDAGInfo::MBlazeSelectionDAGInfo() { +MBlazeSelectionDAGInfo::MBlazeSelectionDAGInfo(const MBlazeTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } MBlazeSelectionDAGInfo::~MBlazeSelectionDAGInfo() { diff --git a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h index 11e6879..9f8e2aa 100644 --- a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h +++ b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h @@ -18,9 +18,11 @@ namespace llvm { +class MBlazeTargetMachine; + class MBlazeSelectionDAGInfo : public TargetSelectionDAGInfo { public: - MBlazeSelectionDAGInfo(); + explicit MBlazeSelectionDAGInfo(const MBlazeTargetMachine &TM); ~MBlazeSelectionDAGInfo(); }; diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index 9eba2b3..4252953 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -39,7 +39,7 @@ MBlazeTargetMachine(const Target &T, const std::string &TT, "f64:32:32-v64:32:32-v128:32:32-n32"), InstrInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0), - TLInfo(*this) { + TLInfo(*this), TSInfo(*this) { if (getRelocationModel() == Reloc::Default) { setRelocationModel(Reloc::Static); } diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h index 9bf9898..6a57e58 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.h +++ b/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -17,6 +17,7 @@ #include "MBlazeSubtarget.h" #include "MBlazeInstrInfo.h" #include "MBlazeISelLowering.h" +#include "MBlazeSelectionDAGInfo.h" #include "MBlazeIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" @@ -31,6 +32,7 @@ namespace llvm { MBlazeInstrInfo InstrInfo; TargetFrameInfo FrameInfo; MBlazeTargetLowering TLInfo; + MBlazeSelectionDAGInfo TSInfo; MBlazeIntrinsicInfo IntrinsicInfo; public: MBlazeTargetMachine(const Target &T, const std::string &TT, @@ -54,6 +56,9 @@ namespace llvm { virtual const MBlazeTargetLowering *getTargetLowering() const { return &TLInfo; } + virtual const MBlazeSelectionDAGInfo* getSelectionDAGInfo() const + { return &TSInfo; } + const TargetIntrinsicInfo *getIntrinsicInfo() const { return &IntrinsicInfo; } diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp index 15d16ec..3de173c 100644 --- a/lib/Target/MSIL/MSILWriter.cpp +++ b/lib/Target/MSIL/MSILWriter.cpp @@ -34,12 +34,11 @@ namespace llvm { MSILTarget(const Target &T, const std::string &TT, const std::string &FS) : TargetMachine(T) {} - virtual bool WantsWholeFile() const { return true; } - virtual bool addPassesToEmitWholeFile(PassManager &PM, - formatted_raw_ostream &Out, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, - bool DisableVerify); + virtual bool addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &Out, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel, + bool DisableVerify); virtual const TargetData *getTargetData() const { return 0; } }; @@ -279,6 +278,8 @@ std::string MSILWriter::getConvModopt(CallingConv::ID CallingConvID) { return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvFastcall) "; case CallingConv::X86_StdCall: return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvStdcall) "; + case CallingConv::X86_ThisCall: + return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvThiscall) "; default: errs() << "CallingConvID = " << CallingConvID << '\n'; llvm_unreachable("Unsupported calling convention"); @@ -1686,11 +1687,11 @@ void MSILWriter::printExternals() { // External Interface declaration //===----------------------------------------------------------------------===// -bool MSILTarget::addPassesToEmitWholeFile(PassManager &PM, - formatted_raw_ostream &o, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, - bool DisableVerify) +bool MSILTarget::addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &o, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel, + bool DisableVerify) { if (FileType != TargetMachine::CGFT_AssemblyFile) return true; MSILWriter* Writer = new MSILWriter(o); diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h index f9620e8..e937696 100644 --- a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h +++ b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h @@ -26,7 +26,7 @@ namespace llvm { /// MSP430MCInstLower - This class is used to lower an MachineInstr /// into an MCInst. -class VISIBILITY_HIDDEN MSP430MCInstLower { +class LLVM_LIBRARY_VISIBILITY MSP430MCInstLower { MCContext &Ctx; Mangler &Mang; diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index c3e2bdf7..403400e 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -83,7 +83,7 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : setStackPointerRegisterToSaveRestore(MSP430::SPW); setBooleanContents(ZeroOrOneBooleanContent); - setSchedulingPreference(SchedulingForLatency); + setSchedulingPreference(Sched::Latency); // We have post-incremented loads / stores. setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal); @@ -897,6 +897,9 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setReturnAddressIsTaken(true); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); DebugLoc dl = Op.getDebugLoc(); @@ -920,6 +923,7 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index 2b09b3d..18226ab 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -32,7 +32,8 @@ MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm) void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIdx, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); @@ -59,7 +60,8 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC) const{ + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const{ DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); @@ -85,10 +87,8 @@ bool MSP430InstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { if (DestRC == SrcRC) { unsigned Opc; if (DestRC == &MSP430::GR16RegClass) { @@ -130,7 +130,8 @@ MSP430InstrInfo::isMoveInstr(const MachineInstr& MI, bool MSP430InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const { + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -154,7 +155,8 @@ MSP430InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, bool MSP430InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const { + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h index 6ef4b0a..842b4cb 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.h +++ b/lib/Target/MSP430/MSP430InstrInfo.h @@ -52,7 +52,8 @@ public: bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; bool isMoveInstr(const MachineInstr& MI, unsigned &SrcReg, unsigned &DstReg, @@ -62,18 +63,22 @@ public: MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; unsigned GetInstSizeInBytes(const MachineInstr *MI) const; diff --git a/lib/Target/MSP430/MSP430RegisterInfo.td b/lib/Target/MSP430/MSP430RegisterInfo.td index 4078626..f8aec66 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.td +++ b/lib/Target/MSP430/MSP430RegisterInfo.td @@ -43,6 +43,9 @@ def R13B : MSP430Reg<13, "r13">; def R14B : MSP430Reg<14, "r14">; def R15B : MSP430Reg<15, "r15">; +def subreg_8bit : SubRegIndex { let Namespace = "MSP430"; } + +let SubRegIndices = [subreg_8bit] in { def PCW : MSP430RegWithSubregs<0, "r0", [PCB]>; def SPW : MSP430RegWithSubregs<1, "r1", [SPB]>; def SRW : MSP430RegWithSubregs<2, "r2", [SRB]>; @@ -59,13 +62,7 @@ def R12W : MSP430RegWithSubregs<12, "r12", [R12B]>; def R13W : MSP430RegWithSubregs<13, "r13", [R13B]>; def R14W : MSP430RegWithSubregs<14, "r14", [R14B]>; def R15W : MSP430RegWithSubregs<15, "r15", [R15B]>; - -def : SubRegSet<1, [PCW, SPW, SRW, CGW, FPW, - R5W, R6W, R7W, R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W], - [PCB, SPB, SRB, CGB, FPB, - R5B, R6B, R7B, R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>; - -def subreg_8bit : PatLeaf<(i32 1)>; +} def GR8 : RegisterClass<"MSP430", [i8], 8, // Volatile registers @@ -101,7 +98,7 @@ def GR16 : RegisterClass<"MSP430", [i16], 16, // Volatile, but not allocable PCW, SPW, SRW, CGW]> { - let SubRegClassList = [GR8]; + let SubRegClasses = [(GR8 subreg_8bit)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp index a54c929..24f45fa 100644 --- a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp +++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "msp430-selectiondag-info" -#include "MSP430SelectionDAGInfo.h" +#include "MSP430TargetMachine.h" using namespace llvm; -MSP430SelectionDAGInfo::MSP430SelectionDAGInfo() { +MSP430SelectionDAGInfo::MSP430SelectionDAGInfo(const MSP430TargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } MSP430SelectionDAGInfo::~MSP430SelectionDAGInfo() { diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/lib/Target/MSP430/MSP430SelectionDAGInfo.h index c952ab7..fa81948 100644 --- a/lib/Target/MSP430/MSP430SelectionDAGInfo.h +++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.h @@ -18,9 +18,11 @@ namespace llvm { +class MSP430TargetMachine; + class MSP430SelectionDAGInfo : public TargetSelectionDAGInfo { public: - MSP430SelectionDAGInfo(); + explicit MSP430SelectionDAGInfo(const MSP430TargetMachine &TM); ~MSP430SelectionDAGInfo(); }; diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index a0dbac2..99877c8 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -33,7 +33,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, Subtarget(TT, FS), // FIXME: Check TargetData string. DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), - InstrInfo(*this), TLInfo(*this), + InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsDown, 2, -2) { } diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h index 68bde9a..b93edfd 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.h +++ b/lib/Target/MSP430/MSP430TargetMachine.h @@ -17,6 +17,7 @@ #include "MSP430InstrInfo.h" #include "MSP430ISelLowering.h" +#include "MSP430SelectionDAGInfo.h" #include "MSP430RegisterInfo.h" #include "MSP430Subtarget.h" #include "llvm/Target/TargetData.h" @@ -32,6 +33,7 @@ class MSP430TargetMachine : public LLVMTargetMachine { const TargetData DataLayout; // Calculates type size & alignment MSP430InstrInfo InstrInfo; MSP430TargetLowering TLInfo; + MSP430SelectionDAGInfo TSInfo; // MSP430 does not have any call stack frame, therefore not having // any MSP430 specific FrameInfo class. @@ -54,6 +56,10 @@ public: return &TLInfo; } + virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); }; // MSP430TargetMachine. diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp index d269153..4d7fe4c 100644 --- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp @@ -145,7 +145,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) { CPUBitmask |= (1 << MipsRegisterInfo:: getRegisterNumbering(RI.getFrameRegister(*MF))); - if (MFI->hasCalls()) + if (MFI->adjustsStack()) CPUBitmask |= (1 << MipsRegisterInfo:: getRegisterNumbering(RI.getRARegister())); diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index ee85a3f3..3888bbf 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -225,12 +225,12 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) { MVT::Other, Offset0, Base, Chain); SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, NVT), 0); - SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPEVEN, dl, + SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl, MVT::f64, Undef, SDValue(LD0, 0)); SDNode *LD1 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32, MVT::Other, Offset1, Base, SDValue(LD0, 1)); - SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPODD, dl, + SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl, MVT::f64, I0, SDValue(LD1, 0)); ReplaceUses(SDValue(N, 0), I1); @@ -266,9 +266,9 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) { DebugLoc dl = N->getDebugLoc(); // Get the even and odd part from the f64 register - SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::SUBREG_FPODD, + SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::sub_fpodd, dl, MVT::f32, N1); - SDValue FPEven = CurDAG->getTargetExtractSubreg(Mips::SUBREG_FPEVEN, + SDValue FPEven = CurDAG->getTargetExtractSubreg(Mips::sub_fpeven, dl, MVT::f32, N1); // The second store should start after for 4 bytes. @@ -438,9 +438,9 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { SDValue Undef = SDValue( CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::f64), 0); SDNode *MTC = CurDAG->getMachineNode(Mips::MTC1, dl, MVT::f32, Zero); - SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPEVEN, dl, + SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl, MVT::f64, Undef, SDValue(MTC, 0)); - SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPODD, dl, + SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl, MVT::f64, I0, SDValue(MTC, 0)); ReplaceUses(SDValue(Node, 0), I1); return I1.getNode(); diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index dbd3c24..4005e35 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -124,7 +124,6 @@ void MipsInstrInfo:: insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); BuildMI(MBB, MI, DL, get(Mips::NOP)); } @@ -132,10 +131,8 @@ bool MipsInstrInfo:: copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL; - - if (I != MBB.end()) DL = I->getDebugLoc(); + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { if (DestRC != SrcRC) { @@ -190,7 +187,8 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, void MipsInstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -223,7 +221,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, void MipsInstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, - const TargetRegisterClass *RC) const + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -624,7 +623,8 @@ unsigned MipsInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { GlobalBaseReg = RegInfo.createVirtualRegister(Mips::CPURegsRegisterClass); bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, Mips::GP, Mips::CPURegsRegisterClass, - Mips::CPURegsRegisterClass); + Mips::CPURegsRegisterClass, + DebugLoc()); assert(Ok && "Couldn't assign to global base register!"); Ok = Ok; // Silence warning when assertions are turned off. RegInfo.addLiveIn(Mips::GP); diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index ab8dc59..7919d9a 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -209,16 +209,19 @@ public: MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 478da84..5e719af 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -288,7 +288,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const // Stack locations for FP and RA. If only one of them is used, // the space must be allocated for both, otherwise no space at all. - if (hasFP(MF) || MFI->hasCalls()) { + if (hasFP(MF) || MFI->adjustsStack()) { // FP stack location MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), StackOffset); @@ -302,7 +302,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const MipsFI->setRAStackOffset(StackOffset); StackOffset += RegSize; - if (MFI->hasCalls()) + if (MFI->adjustsStack()) TopCPUSavedRegOff += RegSize; } @@ -407,7 +407,7 @@ emitPrologue(MachineFunction &MF) const unsigned StackSize = MFI->getStackSize(); // No need to allocate space on the stack. - if (StackSize == 0 && !MFI->hasCalls()) return; + if (StackSize == 0 && !MFI->adjustsStack()) return; int FPOffset = MipsFI->getFPStackOffset(); int RAOffset = MipsFI->getRAStackOffset(); @@ -425,7 +425,7 @@ emitPrologue(MachineFunction &MF) const // Save the return address only if the function isnt a leaf one. // sw $ra, stack_loc($sp) - if (MFI->hasCalls()) { + if (MFI->adjustsStack()) { BuildMI(MBB, MBBI, dl, TII.get(Mips::SW)) .addReg(Mips::RA).addImm(RAOffset).addReg(Mips::SP); } @@ -477,7 +477,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const // Restore the return address only if the function isnt a leaf one. // lw $ra, stack_loc($sp) - if (MFI->hasCalls()) { + if (MFI->adjustsStack()) { BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::RA) .addImm(RAOffset).addReg(Mips::SP); } diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 9fd044c..bc857b8 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -23,15 +23,6 @@ class MipsSubtarget; class TargetInstrInfo; class Type; -namespace Mips { - /// SubregIndex - The index of various sized subregister classes. Note that - /// these indices must be kept in sync with the class indices in the - /// MipsRegisterInfo.td file. - enum SubregIndex { - SUBREG_FPEVEN = 1, SUBREG_FPODD = 2 - }; -} - struct MipsRegisterInfo : public MipsGenRegisterInfo { const MipsSubtarget &Subtarget; const TargetInstrInfo &TII; diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index 00e7723..be78a22 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -34,9 +34,14 @@ class FPR<bits<5> num, string n> : MipsReg<n> { } // Mips 64-bit (aliased) FPU Registers -class AFPR<bits<5> num, string n, list<Register> subregs> +let Namespace = "Mips" in { +def sub_fpeven : SubRegIndex; +def sub_fpodd : SubRegIndex; +} +class AFPR<bits<5> num, string n, list<Register> subregs> : MipsRegWithSubRegs<n, subregs> { let Num = num; + let SubRegIndices = [sub_fpeven, sub_fpodd]; } //===----------------------------------------------------------------------===// @@ -141,23 +146,6 @@ let Namespace = "Mips" in { } //===----------------------------------------------------------------------===// -// Subregister Set Definitions -//===----------------------------------------------------------------------===// - -def mips_subreg_fpeven : PatLeaf<(i32 1)>; -def mips_subreg_fpodd : PatLeaf<(i32 2)>; - -def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7, - D8, D9, D10, D11, D12, D13, D14, D15], - [F0, F2, F4, F6, F8, F10, F12, F14, - F16, F18, F20, F22, F24, F26, F28, F30]>; - -def : SubRegSet<2, [D0, D1, D2, D3, D4, D5, D6, D7, - D8, D9, D10, D11, D12, D13, D14, D15], - [F1, F3, F5, F7, F9, F11, F13, F15, - F17, F19, F21, F23, F25, F27, F29, F31]>; - -//===----------------------------------------------------------------------===// // Register Classes //===----------------------------------------------------------------------===// @@ -255,7 +243,7 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, // Reserved D15]> { - let SubRegClassList = [FGR32, FGR32]; + let SubRegClasses = [(FGR32 sub_fpeven, sub_fpodd)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/lib/Target/Mips/MipsSelectionDAGInfo.cpp index 72c149d..e4d70fc 100644 --- a/lib/Target/Mips/MipsSelectionDAGInfo.cpp +++ b/lib/Target/Mips/MipsSelectionDAGInfo.cpp @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-selectiondag-info" -#include "MipsSelectionDAGInfo.h" +#include "MipsTargetMachine.h" using namespace llvm; -MipsSelectionDAGInfo::MipsSelectionDAGInfo() { +MipsSelectionDAGInfo::MipsSelectionDAGInfo(const MipsTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } MipsSelectionDAGInfo::~MipsSelectionDAGInfo() { diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.h b/lib/Target/Mips/MipsSelectionDAGInfo.h index 6eaf0c9..6cafb55 100644 --- a/lib/Target/Mips/MipsSelectionDAGInfo.h +++ b/lib/Target/Mips/MipsSelectionDAGInfo.h @@ -18,9 +18,11 @@ namespace llvm { +class MipsTargetMachine; + class MipsSelectionDAGInfo : public TargetSelectionDAGInfo { public: - MipsSelectionDAGInfo(); + explicit MipsSelectionDAGInfo(const MipsTargetMachine &TM); ~MipsSelectionDAGInfo(); }; diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 4724ff7..ad3eb9e 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -42,7 +42,7 @@ MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS, std::string("E-p:32:32:32-i8:8:32-i16:16:32-n32")), InstrInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0), - TLInfo(*this) { + TLInfo(*this), TSInfo(*this) { // Abicall enables PIC by default if (getRelocationModel() == Reloc::Default) { if (Subtarget.isABI_O32()) diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index cd671cf..d63976f 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -17,6 +17,7 @@ #include "MipsSubtarget.h" #include "MipsInstrInfo.h" #include "MipsISelLowering.h" +#include "MipsSelectionDAGInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" @@ -30,6 +31,7 @@ namespace llvm { MipsInstrInfo InstrInfo; TargetFrameInfo FrameInfo; MipsTargetLowering TLInfo; + MipsSelectionDAGInfo TSInfo; public: MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS, bool isLittle); @@ -51,6 +53,10 @@ namespace llvm { return &TLInfo; } + virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + // Pass Pipeline Configuration virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h index a424c27..aa2e1f4 100644 --- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h +++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h @@ -29,7 +29,7 @@ #include <string> namespace llvm { - class VISIBILITY_HIDDEN PIC16AsmPrinter : public AsmPrinter { + class LLVM_LIBRARY_VISIBILITY PIC16AsmPrinter : public AsmPrinter { public: explicit PIC16AsmPrinter(TargetMachine &TM, MCStreamer &Streamer); private: diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp index 5d86329..6a4d0d6 100644 --- a/lib/Target/PIC16/PIC16DebugInfo.cpp +++ b/lib/Target/PIC16/PIC16DebugInfo.cpp @@ -70,7 +70,7 @@ void PIC16DbgInfo::PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo, // We also need to encode the information about the base type of // pointer in TypeNo. - DIType BaseType = DIDerivedType(Ty.getNode()).getTypeDerivedFrom(); + DIType BaseType = DIDerivedType(Ty).getTypeDerivedFrom(); PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName); } @@ -79,7 +79,7 @@ void PIC16DbgInfo::PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo, bool &HasAux, int Aux[], std::string &TagName) { - DICompositeType CTy = DICompositeType(Ty.getNode()); + DICompositeType CTy = DICompositeType(Ty); DIArray Elements = CTy.getTypeArray(); unsigned short size = 1; unsigned short Dimension[4]={0,0,0,0}; @@ -88,7 +88,7 @@ void PIC16DbgInfo::PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo, if (Element.getTag() == dwarf::DW_TAG_subrange_type) { TypeNo = TypeNo << PIC16Dbg::S_DERIVED; TypeNo = TypeNo | PIC16Dbg::DT_ARY; - DISubrange SubRange = DISubrange(Element.getNode()); + DISubrange SubRange = DISubrange(Element); Dimension[i] = SubRange.getHi() - SubRange.getLo() + 1; // Each dimension is represented by 2 bytes starting at byte 9. Aux[8+i*2+0] = Dimension[i]; @@ -111,7 +111,7 @@ void PIC16DbgInfo::PopulateStructOrUnionTypeInfo (DIType Ty, unsigned short &TypeNo, bool &HasAux, int Aux[], std::string &TagName) { - DICompositeType CTy = DICompositeType(Ty.getNode()); + DICompositeType CTy = DICompositeType(Ty); TypeNo = TypeNo << PIC16Dbg::S_BASIC; if (Ty.getTag() == dwarf::DW_TAG_structure_type) TypeNo = TypeNo | PIC16Dbg::T_STRUCT; @@ -124,7 +124,7 @@ void PIC16DbgInfo::PopulateStructOrUnionTypeInfo (DIType Ty, // llvm.dbg.composite* global variable. Since we need to revisit // PIC16DebugInfo implementation anyways after the MDNodes based // framework is done, let us continue with the way it is. - std::string UniqueSuffix = "." + Ty.getNode()->getNameStr().substr(18); + std::string UniqueSuffix = "." + Ty->getNameStr().substr(18); TagName += UniqueSuffix; unsigned short size = CTy.getSizeInBits()/8; // 7th and 8th byte represent size. @@ -303,7 +303,7 @@ void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy, bool HasAux = false; int ElementAux[PIC16Dbg::AuxSize] = { 0 }; std::string TagName = ""; - DIDerivedType DITy(Element.getNode()); + DIDerivedType DITy(Element); unsigned short ElementSize = DITy.getSizeInBits()/8; // Get mangleddd name for this structure/union element. std::string MangMemName = DITy.getName().str() + SuffixNo; @@ -336,7 +336,7 @@ void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) { CTy.getTag() == dwarf::DW_TAG_structure_type ) { // Get the number after llvm.dbg.composite and make UniqueSuffix from // it. - std::string DIVar = CTy.getNode()->getNameStr(); + std::string DIVar = CTy->getNameStr(); std::string UniqueSuffix = "." + DIVar.substr(18); std::string MangledCTyName = CTy.getName().str() + UniqueSuffix; unsigned short size = CTy.getSizeInBits()/8; diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h index f1fcec5..ecaddd3 100644 --- a/lib/Target/PIC16/PIC16ISelDAGToDAG.h +++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.h @@ -26,7 +26,7 @@ using namespace llvm; namespace { -class VISIBILITY_HIDDEN PIC16DAGToDAGISel : public SelectionDAGISel { +class LLVM_LIBRARY_VISIBILITY PIC16DAGToDAGISel : public SelectionDAGISel { /// TM - Keep a reference to PIC16TargetMachine. const PIC16TargetMachine &TM; diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp index 9e415e0..793dd9f 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.cpp +++ b/lib/Target/PIC16/PIC16InstrInfo.cpp @@ -70,7 +70,8 @@ unsigned PIC16InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { const PIC16TargetLowering *PTLI = TM.getTargetLowering(); DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -112,7 +113,8 @@ void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { const PIC16TargetLowering *PTLI = TM.getTargetLowering(); DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -153,9 +155,8 @@ bool PIC16InstrInfo::copyRegToReg (MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { if (DestRC == PIC16::FSR16RegisterClass) { BuildMI(MBB, I, DL, get(PIC16::copy_fsr), DestReg).addReg(SrcReg); diff --git a/lib/Target/PIC16/PIC16InstrInfo.h b/lib/Target/PIC16/PIC16InstrInfo.h index 56f51f0..40a4cb4 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.h +++ b/lib/Target/PIC16/PIC16InstrInfo.h @@ -49,17 +49,20 @@ public: virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; virtual bool isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SrcSubIdx, unsigned &DstSubIdx) const; diff --git a/lib/Target/PIC16/PIC16Section.h b/lib/Target/PIC16/PIC16Section.h index 9039ca7..5b33b51 100644 --- a/lib/Target/PIC16/PIC16Section.h +++ b/lib/Target/PIC16/PIC16Section.h @@ -44,7 +44,8 @@ namespace llvm { unsigned Size; PIC16Section(StringRef name, SectionKind K, StringRef addr, int color) - : MCSection(K), Name(name), Address(addr), Color(color), Size(0) { + : MCSection(SV_PIC16, K), Name(name), Address(addr), + Color(color), Size(0) { } public: @@ -86,6 +87,11 @@ namespace llvm { /// to a section. virtual void PrintSwitchToSection(const MCAsmInfo &MAI, raw_ostream &OS) const; + + static bool classof(const MCSection *S) { + return S->getVariant() == SV_PIC16; + } + static bool classof(const PIC16Section *) { return true; } }; } // end namespace llvm diff --git a/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp b/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp index 76c6c60..995955a 100644 --- a/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp +++ b/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "pic16-selectiondag-info" -#include "PIC16SelectionDAGInfo.h" +#include "PIC16TargetMachine.h" using namespace llvm; -PIC16SelectionDAGInfo::PIC16SelectionDAGInfo() { +PIC16SelectionDAGInfo::PIC16SelectionDAGInfo(const PIC16TargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } PIC16SelectionDAGInfo::~PIC16SelectionDAGInfo() { diff --git a/lib/Target/PIC16/PIC16SelectionDAGInfo.h b/lib/Target/PIC16/PIC16SelectionDAGInfo.h index 112480e5..c67fd8b 100644 --- a/lib/Target/PIC16/PIC16SelectionDAGInfo.h +++ b/lib/Target/PIC16/PIC16SelectionDAGInfo.h @@ -18,9 +18,11 @@ namespace llvm { +class PIC16TargetMachine; + class PIC16SelectionDAGInfo : public TargetSelectionDAGInfo { public: - PIC16SelectionDAGInfo(); + explicit PIC16SelectionDAGInfo(const PIC16TargetMachine &TM); ~PIC16SelectionDAGInfo(); }; diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp index e2acb85..82b69be 100644 --- a/lib/Target/PIC16/PIC16TargetMachine.cpp +++ b/lib/Target/PIC16/PIC16TargetMachine.cpp @@ -35,7 +35,7 @@ PIC16TargetMachine::PIC16TargetMachine(const Target &T, const std::string &TT, : LLVMTargetMachine(T, TT), Subtarget(TT, FS, Trad), DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-n8"), - InstrInfo(*this), TLInfo(*this), + InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0) { } diff --git a/lib/Target/PIC16/PIC16TargetMachine.h b/lib/Target/PIC16/PIC16TargetMachine.h index 849845a..dae5d31 100644 --- a/lib/Target/PIC16/PIC16TargetMachine.h +++ b/lib/Target/PIC16/PIC16TargetMachine.h @@ -17,6 +17,7 @@ #include "PIC16InstrInfo.h" #include "PIC16ISelLowering.h" +#include "PIC16SelectionDAGInfo.h" #include "PIC16RegisterInfo.h" #include "PIC16Subtarget.h" #include "llvm/Target/TargetData.h" @@ -32,6 +33,7 @@ class PIC16TargetMachine : public LLVMTargetMachine { const TargetData DataLayout; // Calculates type size & alignment PIC16InstrInfo InstrInfo; PIC16TargetLowering TLInfo; + PIC16SelectionDAGInfo TSInfo; // PIC16 does not have any call stack frame, therefore not having // any PIC16 specific FrameInfo class. @@ -54,6 +56,10 @@ public: return &TLInfo; } + virtual const PIC16SelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 3d9f8aa..00eebb8 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -712,8 +712,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { if (PPCSubTarget.isGigaProcessor() && OtherCondIdx == -1) IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, CCReg), 0); - else - IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCR, dl, MVT::i32, CCReg), 0); + else + IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32, + CR7Reg, CCReg), 0); SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31), getI32Imm(31), getI32Imm(31) }; @@ -848,7 +849,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, N->getOperand(0), InFlag); else - return CurDAG->getMachineNode(PPC::MFCR, dl, MVT::i32, InFlag); + return CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32, + N->getOperand(0), InFlag); } case ISD::SDIV: { diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 6f11953..10b516a 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5496,12 +5496,15 @@ bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MFI->setReturnAddressIsTaken(true); + DebugLoc dl = Op.getDebugLoc(); unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); // Make sure the function does not optimize away the store of the RA to // the stack. - MachineFunction &MF = DAG.getMachineFunction(); PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); FuncInfo->setLRStoreRequired(); bool isPPC64 = PPCSubTarget.isPPC64(); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 1d05f3d..6dcaf1e 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -111,9 +111,10 @@ namespace llvm { /// Return with a flag operand, matched by 'blr' RET_FLAG, - /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCR/MFOCRF instructions. - /// This copies the bits corresponding to the specified CRREG into the - /// resultant GPR. Bits corresponding to other CR regs are undefined. + /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCRpseud/MFOCRF + /// instructions. This copies the bits corresponding to the specified + /// CRREG into the resultant GPR. Bits corresponding to other CR regs + /// are undefined. MFCR, /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index ae1fbd8..1b7a778 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -203,8 +203,6 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - BuildMI(MBB, MI, DL, get(PPC::NOP)); } @@ -347,15 +345,13 @@ bool PPCInstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { if (DestRC != SrcRC) { // Not yet supported! return false; } - DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - if (DestRC == PPC::GPRCRegisterClass) { BuildMI(MBB, MI, DL, get(PPC::OR), DestReg).addReg(SrcReg).addReg(SrcReg); } else if (DestRC == PPC::G8RCRegisterClass) { @@ -446,7 +442,8 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, // issue a MFCR to save all of the CRBits. unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? PPC::R2 : PPC::R0; - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), ScratchReg)); + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCRpseud), ScratchReg) + .addReg(SrcReg, getKillRegState(isKill))); // If the saved register wasn't CR0, shift the bits left so that they are // in CR0's slot. @@ -520,7 +517,8 @@ void PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIdx, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); SmallVector<MachineInstr*, 4> NewMIs; @@ -635,7 +633,8 @@ void PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); SmallVector<MachineInstr*, 4> NewMIs; DebugLoc DL; diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 9fb6e7d..7a9e11b 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -114,17 +114,20 @@ public: MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 532a3ec..63b4581 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -662,7 +662,7 @@ def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst), [(PPCstcx GPRC:$rS, xoaddr:$dst)]>, isDOT; -let isBarrier = 1, hasCtrlDep = 1 in +let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>; //===----------------------------------------------------------------------===// @@ -862,7 +862,6 @@ def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst), [(store F8RC:$frS, xaddr:$dst)]>; } -let isBarrier = 1 in def SYNC : XForm_24_sync<31, 598, (outs), (ins), "sync", LdStSync, [(int_ppc_sync)]>; @@ -1118,14 +1117,17 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins), def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; -// FIXME: this Uses all the CR registers. Marking it as such is -// necessary for DeadMachineInstructionElim to do the right thing. -// However, marking it also exposes PR 2964, and causes crashes in -// the Local RA because it doesn't like this sequence: + +// This is a pseudo for MFCR, which implicitly uses all 8 of its subregisters; +// declaring that here gives the local register allocator problems with this: // vreg = MCRF CR0 // MFCR <kill of whatever preg got assigned to vreg> -// For now DeadMachineInstructionElim is turned off, so don't do the marking. -def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins), "mfcr $rT", SprMFCR>, +// while not declaring it breaks DeadMachineInstructionElimination. +// As it turns out, in all cases where we currently use this, +// we're only interested in one subregister of it. Represent this in the +// instruction to keep the register allocator from becoming confused. +def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM), + "mfcr $rT ${:comment} $FXM", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM), "mfcr $rT, $FXM", SprMFCR>, diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 5f1e04e..0ff852c 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -689,19 +689,15 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC; unsigned Reg = findScratchRegister(II, RS, RC, SPAdj); + unsigned SrcReg = MI.getOperand(0).getReg(); // We need to store the CR in the low 4-bits of the saved value. First, issue - // an MFCR to save all of the CRBits. Add an implicit kill of the CR. - if (!MI.getOperand(0).isKill()) - BuildMI(MBB, II, dl, TII.get(PPC::MFCR), Reg); - else - // Implicitly kill the CR register. - BuildMI(MBB, II, dl, TII.get(PPC::MFCR), Reg) - .addReg(MI.getOperand(0).getReg(), RegState::ImplicitKill); + // an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg. + BuildMI(MBB, II, dl, TII.get(PPC::MFCRpseud), Reg) + .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); // If the saved register wasn't CR0, shift the bits left so that they are in // CR0's slot. - unsigned SrcReg = MI.getOperand(0).getReg(); if (SrcReg != PPC::CR0) // rlwinm rA, rA, ShiftBits, 0, 31. BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg) @@ -1009,7 +1005,7 @@ void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const { if (!DisableRedZone && FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. - !MFI->hasCalls() && // No calls. + !MFI->adjustsStack() && // No calls. (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment. // No need for frame MFI->setStackSize(0); diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 1cb7340..8604f54 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -10,6 +10,15 @@ // //===----------------------------------------------------------------------===// +let Namespace = "PPC" in { +def sub_lt : SubRegIndex; +def sub_gt : SubRegIndex; +def sub_eq : SubRegIndex; +def sub_un : SubRegIndex; +def sub_32 : SubRegIndex; +} + + class PPCReg<string n> : Register<n> { let Namespace = "PPC"; } @@ -25,6 +34,7 @@ class GPR<bits<5> num, string n> : PPCReg<n> { class GP8<GPR SubReg, string n> : PPCReg<n> { field bits<5> Num = SubReg.Num; let SubRegs = [SubReg]; + let SubRegIndices = [sub_32]; } // SPR - One of the 32-bit special-purpose registers @@ -225,6 +235,7 @@ def CR7EQ : CRBIT<30, "30">, DwarfRegNum<[0]>; def CR7UN : CRBIT<31, "31">, DwarfRegNum<[0]>; // Condition registers +let SubRegIndices = [sub_lt, sub_gt, sub_eq, sub_un] in { def CR0 : CR<0, "cr0", [CR0LT, CR0GT, CR0EQ, CR0UN]>, DwarfRegNum<[68]>; def CR1 : CR<1, "cr1", [CR1LT, CR1GT, CR1EQ, CR1UN]>, DwarfRegNum<[69]>; def CR2 : CR<2, "cr2", [CR2LT, CR2GT, CR2EQ, CR2UN]>, DwarfRegNum<[70]>; @@ -233,15 +244,7 @@ def CR4 : CR<4, "cr4", [CR4LT, CR4GT, CR4EQ, CR4UN]>, DwarfRegNum<[72]>; def CR5 : CR<5, "cr5", [CR5LT, CR5GT, CR5EQ, CR5UN]>, DwarfRegNum<[73]>; def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74]>; def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75]>; - -def : SubRegSet<1, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7], - [CR0LT, CR1LT, CR2LT, CR3LT, CR4LT, CR5LT, CR6LT, CR7LT]>; -def : SubRegSet<2, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7], - [CR0GT, CR1GT, CR2GT, CR3GT, CR4GT, CR5GT, CR6GT, CR7GT]>; -def : SubRegSet<3, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7], - [CR0EQ, CR1EQ, CR2EQ, CR3EQ, CR4EQ, CR5EQ, CR6EQ, CR7EQ]>; -def : SubRegSet<4, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7], - [CR0UN, CR1UN, CR2UN, CR3UN, CR4UN, CR5UN, CR6UN, CR7UN]>; +} // Link register def LR : SPR<8, "lr">, DwarfRegNum<[65]>; @@ -372,7 +375,7 @@ def CRBITRC : RegisterClass<"PPC", [i32], 32, def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2, CR3, CR4]> { - let SubRegClassList = [CRBITRC, CRBITRC, CRBITRC, CRBITRC]; + let SubRegClasses = [(CRBITRC sub_lt, sub_gt, sub_eq, sub_un)]; } def CTRRC : RegisterClass<"PPC", [i32], 32, [CTR]>; diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp index c0004a9..d4258b4 100644 --- a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp +++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "powerpc-selectiondag-info" -#include "PPCSelectionDAGInfo.h" +#include "PPCTargetMachine.h" using namespace llvm; -PPCSelectionDAGInfo::PPCSelectionDAGInfo() { +PPCSelectionDAGInfo::PPCSelectionDAGInfo(const PPCTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } PPCSelectionDAGInfo::~PPCSelectionDAGInfo() { diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h index 3ad3418..341b69c 100644 --- a/lib/Target/PowerPC/PPCSelectionDAGInfo.h +++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.h @@ -18,9 +18,11 @@ namespace llvm { +class PPCTargetMachine; + class PPCSelectionDAGInfo : public TargetSelectionDAGInfo { public: - PPCSelectionDAGInfo(); + explicit PPCSelectionDAGInfo(const PPCTargetMachine &TM); ~PPCSelectionDAGInfo(); }; diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index c4a7408..10cd10b 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -44,7 +44,8 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT, : LLVMTargetMachine(T, TT), Subtarget(TT, FS, is64Bit), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), - FrameInfo(*this, is64Bit), JITInfo(*this, is64Bit), TLInfo(*this), + FrameInfo(*this, is64Bit), JITInfo(*this, is64Bit), + TLInfo(*this), TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { if (getRelocationModel() == Reloc::Default) { diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 35e33a2..626ddbb 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -19,6 +19,7 @@ #include "PPCJITInfo.h" #include "PPCInstrInfo.h" #include "PPCISelLowering.h" +#include "PPCSelectionDAGInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" @@ -35,6 +36,7 @@ class PPCTargetMachine : public LLVMTargetMachine { PPCFrameInfo FrameInfo; PPCJITInfo JITInfo; PPCTargetLowering TLInfo; + PPCSelectionDAGInfo TSInfo; InstrItineraryData InstrItins; public: @@ -47,6 +49,9 @@ public: virtual const PPCTargetLowering *getTargetLowering() const { return &TLInfo; } + virtual const PPCSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } virtual const PPCRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 144bf5d..7fa73ed 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -1833,6 +1833,21 @@ entry: We should use DSE + llvm.lifetime.end to delete dead vtable pointer updates. See GCC PR34949 +Another interesting case is that something related could be used for variables +that go const after their ctor has finished. In these cases, globalopt (which +can statically run the constructor) could mark the global const (so it gets put +in the readonly section). A testcase would be: + +#include <complex> +using namespace std; +const complex<char> should_be_in_rodata (42,-42); +complex<char> should_be_in_data (42,-42); +complex<char> should_be_in_bss; + +Where we currently evaluate the ctors but the globals don't become const because +the optimizer doesn't know they "become const" after the ctor is done. See +GCC PR4131 for more examples. + //===---------------------------------------------------------------------===// In this code: diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index e494d7d..8e49eca 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -122,15 +122,13 @@ bool SparcInstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { if (DestRC != SrcRC) { // Not yet supported! return false; } - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - if (DestRC == SP::IntRegsRegisterClass) BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0).addReg(SrcReg); else if (DestRC == SP::FPRegsRegisterClass) @@ -148,7 +146,8 @@ bool SparcInstrInfo::copyRegToReg(MachineBasicBlock &MBB, void SparcInstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -169,7 +168,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, void SparcInstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h index 345674b..a00ba39 100644 --- a/lib/Target/Sparc/SparcInstrInfo.h +++ b/lib/Target/Sparc/SparcInstrInfo.h @@ -74,17 +74,20 @@ public: MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td index 2b05c19..fede929 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.td +++ b/lib/Target/Sparc/SparcRegisterInfo.td @@ -20,6 +20,11 @@ class SparcCtrlReg<string n>: Register<n> { let Namespace = "SP"; } +let Namespace = "SP" in { +def sub_even : SubRegIndex; +def sub_odd : SubRegIndex; +} + // Registers are identified with 5-bit ID numbers. // Ri - 32-bit integer registers class Ri<bits<5> num, string n> : SparcReg<n> { @@ -33,6 +38,7 @@ class Rf<bits<5> num, string n> : SparcReg<n> { class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> { let Num = num; let SubRegs = subregs; + let SubRegIndices = [sub_even, sub_odd]; } // Control Registers diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp index 4825aa9..190c575 100644 --- a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp +++ b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "sparc-selectiondag-info" -#include "SparcSelectionDAGInfo.h" +#include "SparcTargetMachine.h" using namespace llvm; -SparcSelectionDAGInfo::SparcSelectionDAGInfo() { +SparcSelectionDAGInfo::SparcSelectionDAGInfo(const SparcTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } SparcSelectionDAGInfo::~SparcSelectionDAGInfo() { diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h index bc1b561..dcd4203 100644 --- a/lib/Target/Sparc/SparcSelectionDAGInfo.h +++ b/lib/Target/Sparc/SparcSelectionDAGInfo.h @@ -18,9 +18,11 @@ namespace llvm { +class SparcTargetMachine; + class SparcSelectionDAGInfo : public TargetSelectionDAGInfo { public: - SparcSelectionDAGInfo(); + explicit SparcSelectionDAGInfo(const SparcTargetMachine &TM); ~SparcSelectionDAGInfo(); }; diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index a676623..b58d6ba 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -34,7 +34,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT, : LLVMTargetMachine(T, TT), Subtarget(TT, FS, is64bit), DataLayout(Subtarget.getDataLayout()), - TLInfo(*this), InstrInfo(Subtarget), + TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget), FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) { } diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index 1367a31..322c82a 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -20,6 +20,7 @@ #include "SparcInstrInfo.h" #include "SparcSubtarget.h" #include "SparcISelLowering.h" +#include "SparcSelectionDAGInfo.h" namespace llvm { @@ -27,6 +28,7 @@ class SparcTargetMachine : public LLVMTargetMachine { SparcSubtarget Subtarget; const TargetData DataLayout; // Calculates type size & alignment SparcTargetLowering TLInfo; + SparcSelectionDAGInfo TSInfo; SparcInstrInfo InstrInfo; TargetFrameInfo FrameInfo; public: @@ -42,6 +44,9 @@ public: virtual const SparcTargetLowering* getTargetLowering() const { return &TLInfo; } + virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } virtual const TargetData *getTargetData() const { return &DataLayout; } // Pass Pipeline Configuration diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp index 2094cc9..b35190a 100644 --- a/lib/Target/SubtargetFeature.cpp +++ b/lib/Target/SubtargetFeature.cpp @@ -359,29 +359,25 @@ void SubtargetFeatures::dump() const { print(dbgs()); } -/// getDefaultSubtargetFeatures - Return a string listing -/// the features associated with the target triple. +/// getDefaultSubtargetFeatures - Return a string listing the features +/// associated with the target triple. /// /// FIXME: This is an inelegant way of specifying the features of a /// subtarget. It would be better if we could encode this information /// into the IR. See <rdar://5972456>. /// -std::string SubtargetFeatures::getDefaultSubtargetFeatures( - const Triple& Triple) { - switch (Triple.getVendor()) { - case Triple::Apple: - switch (Triple.getArch()) { - case Triple::ppc: // powerpc-apple-* - return std::string("altivec"); - case Triple::ppc64: // powerpc64-apple-* - return std::string("64bit,altivec"); - default: - break; +void SubtargetFeatures::getDefaultSubtargetFeatures(const std::string &CPU, + const Triple& Triple) { + setCPU(CPU); + + if (Triple.getVendor() == Triple::Apple) { + if (Triple.getArch() == Triple::ppc) { + // powerpc-apple-* + AddFeature("altivec"); + } else if (Triple.getArch() == Triple::ppc64) { + // powerpc64-apple-* + AddFeature("64bit"); + AddFeature("altivec"); } - break; - default: - break; - } - - return std::string(""); + } } diff --git a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp index 07cfb2c..90be222 100644 --- a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp @@ -124,9 +124,9 @@ void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, unsigned Reg = MO.getReg(); if (Modifier && strncmp(Modifier, "subreg", 6) == 0) { if (strncmp(Modifier + 7, "even", 4) == 0) - Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::SUBREG_EVEN); + Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_even32); else if (strncmp(Modifier + 7, "odd", 3) == 0) - Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::SUBREG_ODD); + Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_odd32); else assert(0 && "Invalid subreg modifier"); } diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 75d563b..bb2952a 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -30,11 +30,6 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -static const unsigned subreg_even32 = 1; -static const unsigned subreg_odd32 = 2; -static const unsigned subreg_even = 3; -static const unsigned subreg_odd = 4; - namespace { /// SystemZRRIAddressMode - This corresponds to rriaddr, but uses SDValue's /// instead of register numbers for the leaves of the matched tree. @@ -644,7 +639,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { Dividend = CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT, SDValue(Tmp, 0), SDValue(Dividend, 0), - CurDAG->getTargetConstant(subreg_odd, MVT::i32)); + CurDAG->getTargetConstant(SystemZ::subreg_odd, MVT::i32)); SDNode *Result; SDValue DivVal = SDValue(Dividend, 0); @@ -660,7 +655,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Copy the division (odd subreg) result, if it is needed. if (!SDValue(Node, 0).use_empty()) { - unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd); + unsigned SubRegIdx = (is32Bit ? + SystemZ::subreg_odd32 : SystemZ::subreg_odd); SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, NVT, SDValue(Result, 0), @@ -673,7 +669,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Copy the remainder (even subreg) result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even); + unsigned SubRegIdx = (is32Bit ? + SystemZ::subreg_even32 : SystemZ::subreg_even); SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, NVT, SDValue(Result, 0), @@ -718,7 +715,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResVT); { - unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd); + unsigned SubRegIdx = (is32Bit ? + SystemZ::subreg_odd32 : SystemZ::subreg_odd); Dividend = CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT, SDValue(Tmp, 0), SDValue(Dividend, 0), @@ -742,7 +740,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Copy the division (odd subreg) result, if it is needed. if (!SDValue(Node, 0).use_empty()) { - unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd); + unsigned SubRegIdx = (is32Bit ? + SystemZ::subreg_odd32 : SystemZ::subreg_odd); SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, NVT, SDValue(Result, 0), @@ -754,7 +753,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Copy the remainder (even subreg) result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even); + unsigned SubRegIdx = (is32Bit ? + SystemZ::subreg_even32 : SystemZ::subreg_even); SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, NVT, SDValue(Result, 0), diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index e98f18b..76f2901 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -81,7 +81,7 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) : // LLVM's current latency-oriented scheduler can't handle physreg definitions // such as SystemZ has with PSW, so set this to the register-pressure // scheduler, because it can. - setSchedulingPreference(SchedulingForRegPressure); + setSchedulingPreference(Sched::RegPressure); setBooleanContents(ZeroOrOneBooleanContent); diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index c92caa4..043686c 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -61,7 +61,8 @@ static inline bool isGVStub(GlobalValue *GV, SystemZTargetMachine &TM) { void SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIdx, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); @@ -90,7 +91,8 @@ void SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC) const{ + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const{ DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); @@ -119,9 +121,8 @@ bool SystemZInstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { // Determine if DstRC and SrcRC have a common superclass. const TargetRegisterClass *CommonRC = DestRC; @@ -269,7 +270,8 @@ unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI, bool SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const { + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -333,7 +335,8 @@ SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, const TargetRegisterClass *RegClass = CSI[i].getRegClass(); if (RegClass == &SystemZ::FP64RegClass) { MBB.addLiveIn(Reg); - storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RegClass); + storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RegClass, + &RI); } } @@ -343,7 +346,8 @@ SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, bool SystemZInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const { + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -359,7 +363,7 @@ SystemZInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, unsigned Reg = CSI[i].getReg(); const TargetRegisterClass *RegClass = CSI[i].getRegClass(); if (RegClass == &SystemZ::FP64RegClass) - loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass); + loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass, &RI); } // Restore GP registers diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index ef3b39e..a753f14 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -63,7 +63,8 @@ public: bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; bool isMoveInstr(const MachineInstr& MI, unsigned &SrcReg, unsigned &DstReg, @@ -75,18 +76,22 @@ public: MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const; diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index 99e396a..42aa5dd 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -1,4 +1,4 @@ -//===- SystemZRegisterInfo.h - SystemZ Register Information Impl ----*- C++ -*-===// +//===-- SystemZRegisterInfo.h - SystemZ Register Information ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,15 +19,6 @@ namespace llvm { -namespace SystemZ { - /// SubregIndex - The index of various sized subregister classes. Note that - /// these indices must be kept in sync with the class indices in the - /// SystemZRegisterInfo.td file. - enum SubregIndex { - SUBREG_32BIT = 1, SUBREG_EVEN = 1, SUBREG_ODD = 2 - }; -} - class SystemZSubtarget; class SystemZInstrInfo; class Type; diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td index 8795847..b561744 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -53,6 +53,14 @@ class FPRL<bits<4> num, string n, list<Register> subregs> field bits<4> Num = num; } +let Namespace = "SystemZ" in { +def subreg_32bit : SubRegIndex; +def subreg_even32 : SubRegIndex; +def subreg_odd32 : SubRegIndex; +def subreg_even : SubRegIndex; +def subreg_odd : SubRegIndex; +} + // General-purpose registers def R0W : GPR32< 0, "r0">, DwarfRegNum<[0]>; def R1W : GPR32< 1, "r1">, DwarfRegNum<[1]>; @@ -71,6 +79,7 @@ def R13W : GPR32<13, "r13">, DwarfRegNum<[13]>; def R14W : GPR32<14, "r14">, DwarfRegNum<[14]>; def R15W : GPR32<15, "r15">, DwarfRegNum<[15]>; +let SubRegIndices = [subreg_32bit] in { def R0D : GPR64< 0, "r0", [R0W]>, DwarfRegNum<[0]>; def R1D : GPR64< 1, "r1", [R1W]>, DwarfRegNum<[1]>; def R2D : GPR64< 2, "r2", [R2W]>, DwarfRegNum<[2]>; @@ -87,8 +96,10 @@ def R12D : GPR64<12, "r12", [R12W]>, DwarfRegNum<[12]>; def R13D : GPR64<13, "r13", [R13W]>, DwarfRegNum<[13]>; def R14D : GPR64<14, "r14", [R14W]>, DwarfRegNum<[14]>; def R15D : GPR64<15, "r15", [R15W]>, DwarfRegNum<[15]>; +} // Register pairs +let SubRegIndices = [subreg_even32, subreg_odd32] in { def R0P : GPR64< 0, "r0", [R0W, R1W], [R0D, R1D]>, DwarfRegNum<[0]>; def R2P : GPR64< 2, "r2", [R2W, R3W], [R2D, R3D]>, DwarfRegNum<[2]>; def R4P : GPR64< 4, "r4", [R4W, R5W], [R4D, R5D]>, DwarfRegNum<[4]>; @@ -97,7 +108,11 @@ def R8P : GPR64< 8, "r8", [R8W, R9W], [R8D, R9D]>, DwarfRegNum<[8]>; def R10P : GPR64<10, "r10", [R10W, R11W], [R10D, R11D]>, DwarfRegNum<[10]>; def R12P : GPR64<12, "r12", [R12W, R13W], [R12D, R13D]>, DwarfRegNum<[12]>; def R14P : GPR64<14, "r14", [R14W, R15W], [R14D, R15D]>, DwarfRegNum<[14]>; +} +let SubRegIndices = [subreg_even, subreg_odd], + CompositeIndices = [(subreg_even32 subreg_even, subreg_32bit), + (subreg_odd32 subreg_odd, subreg_32bit)] in { def R0Q : GPR128< 0, "r0", [R0D, R1D], [R0P]>, DwarfRegNum<[0]>; def R2Q : GPR128< 2, "r2", [R2D, R3D], [R2P]>, DwarfRegNum<[2]>; def R4Q : GPR128< 4, "r4", [R4D, R5D], [R4P]>, DwarfRegNum<[4]>; @@ -106,6 +121,7 @@ def R8Q : GPR128< 8, "r8", [R8D, R9D], [R8P]>, DwarfRegNum<[8]>; def R10Q : GPR128<10, "r10", [R10D, R11D], [R10P]>, DwarfRegNum<[10]>; def R12Q : GPR128<12, "r12", [R12D, R13D], [R12P]>, DwarfRegNum<[12]>; def R14Q : GPR128<14, "r14", [R14D, R15D], [R14P]>, DwarfRegNum<[14]>; +} // Floating-point registers def F0S : FPRS< 0, "f0">, DwarfRegNum<[16]>; @@ -125,6 +141,7 @@ def F13S : FPRS<13, "f13">, DwarfRegNum<[29]>; def F14S : FPRS<14, "f14">, DwarfRegNum<[30]>; def F15S : FPRS<15, "f15">, DwarfRegNum<[31]>; +let SubRegIndices = [subreg_32bit] in { def F0L : FPRL< 0, "f0", [F0S]>, DwarfRegNum<[16]>; def F1L : FPRL< 1, "f1", [F1S]>, DwarfRegNum<[17]>; def F2L : FPRL< 2, "f2", [F2S]>, DwarfRegNum<[18]>; @@ -141,39 +158,11 @@ def F12L : FPRL<12, "f12", [F12S]>, DwarfRegNum<[28]>; def F13L : FPRL<13, "f13", [F13S]>, DwarfRegNum<[29]>; def F14L : FPRL<14, "f14", [F14S]>, DwarfRegNum<[30]>; def F15L : FPRL<15, "f15", [F15S]>, DwarfRegNum<[31]>; +} // Status register def PSW : SystemZReg<"psw">; -def subreg_32bit : PatLeaf<(i32 1)>; -def subreg_even32 : PatLeaf<(i32 1)>; -def subreg_odd32 : PatLeaf<(i32 2)>; -def subreg_even : PatLeaf<(i32 3)>; -def subreg_odd : PatLeaf<(i32 4)>; - -def : SubRegSet<1, [R0D, R1D, R2D, R3D, R4D, R5D, R6D, R7D, - R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D], - [R0W, R1W, R2W, R3W, R4W, R5W, R6W, R7W, - R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>; - -def : SubRegSet<3, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q], - [R0D, R2D, R4D, R6D, R8D, R10D, R12D, R14D]>; - -def : SubRegSet<4, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q], - [R1D, R3D, R5D, R7D, R9D, R11D, R13D, R15D]>; - -def : SubRegSet<1, [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P], - [R0W, R2W, R4W, R6W, R8W, R10W, R12W, R14W]>; - -def : SubRegSet<2, [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P], - [R1W, R3W, R5W, R7W, R9W, R11W, R13W, R15W]>; - -def : SubRegSet<1, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q], - [R0W, R2W, R4W, R6W, R8W, R10W, R12W, R14W]>; - -def : SubRegSet<2, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q], - [R1W, R3W, R5W, R7W, R9W, R11W, R13W, R15W]>; - /// Register classes def GR32 : RegisterClass<"SystemZ", [i32], 32, // Volatile registers @@ -276,7 +265,7 @@ def GR64 : RegisterClass<"SystemZ", [i64], 64, // Volatile, but not allocable R14D, R15D]> { - let SubRegClassList = [GR32]; + let SubRegClasses = [(GR32 subreg_32bit)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -323,7 +312,7 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64, // Volatile, but not allocable R14D, R15D]> { - let SubRegClassList = [ADDR32]; + let SubRegClasses = [(ADDR32 subreg_32bit)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -366,7 +355,7 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64, def GR64P : RegisterClass<"SystemZ", [v2i32], 64, [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P]> { - let SubRegClassList = [GR32, GR32]; + let SubRegClasses = [(GR32 subreg_even32, subreg_odd32)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -402,7 +391,8 @@ def GR64P : RegisterClass<"SystemZ", [v2i32], 64, def GR128 : RegisterClass<"SystemZ", [v2i64], 128, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q]> { - let SubRegClassList = [GR32, GR32, GR64, GR64]; + let SubRegClasses = [(GR32 subreg_even32, subreg_odd32), + (GR64 subreg_even, subreg_odd)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -462,7 +452,7 @@ def FP32 : RegisterClass<"SystemZ", [f32], 32, def FP64 : RegisterClass<"SystemZ", [f64], 64, [F0L, F1L, F2L, F3L, F4L, F5L, F6L, F7L, F8L, F9L, F10L, F11L, F12L, F13L, F14L, F15L]> { - let SubRegClassList = [FP32]; + let SubRegClasses = [(FP32 subreg_32bit)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 87c831b..3eabcd2 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "systemz-selectiondag-info" -#include "SystemZSelectionDAGInfo.h" +#include "SystemZTargetMachine.h" using namespace llvm; -SystemZSelectionDAGInfo::SystemZSelectionDAGInfo() { +SystemZSelectionDAGInfo::SystemZSelectionDAGInfo(const SystemZTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() { diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h index 5292de9..1450401 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -18,9 +18,11 @@ namespace llvm { +class SystemZTargetMachine; + class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo { public: - SystemZSelectionDAGInfo(); + explicit SystemZSelectionDAGInfo(const SystemZTargetMachine &TM); ~SystemZSelectionDAGInfo(); }; diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index dfa26a1..f45827b 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -29,7 +29,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, Subtarget(TT, FS), DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32" "-f64:64:64-f128:128:128-a0:16:16-n32:64"), - InstrInfo(*this), TLInfo(*this), + InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsDown, 8, -160) { if (getRelocationModel() == Reloc::Default) diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h index d3357cc..6af829b 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/lib/Target/SystemZ/SystemZTargetMachine.h @@ -17,6 +17,7 @@ #include "SystemZInstrInfo.h" #include "SystemZISelLowering.h" +#include "SystemZSelectionDAGInfo.h" #include "SystemZRegisterInfo.h" #include "SystemZSubtarget.h" #include "llvm/Target/TargetData.h" @@ -32,6 +33,7 @@ class SystemZTargetMachine : public LLVMTargetMachine { const TargetData DataLayout; // Calculates type size & alignment SystemZInstrInfo InstrInfo; SystemZTargetLowering TLInfo; + SystemZSelectionDAGInfo TSInfo; // SystemZ does not have any call stack frame, therefore not having // any SystemZ specific FrameInfo class. @@ -53,6 +55,10 @@ public: return &TLInfo; } + virtual const SystemZSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); }; // SystemZTargetMachine. diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index ac67c91..df52368 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -212,7 +212,8 @@ FunctionSections("ffunction-sections", // TargetMachine::TargetMachine(const Target &T) - : TheTarget(T), AsmInfo(0) { + : TheTarget(T), AsmInfo(0), + MCRelaxAll(false) { // Typically it will be subtargets that will adjust FloatABIType from Default // to Soft or Hard. if (UseSoftFloat) @@ -273,13 +274,14 @@ namespace llvm { /// DisableFramePointerElim - This returns true if frame pointer elimination /// optimization should be disabled for the given machine function. bool DisableFramePointerElim(const MachineFunction &MF) { - if (NoFramePointerElim) - return true; - if (NoFramePointerElimNonLeaf) { + // Check to see if we should eliminate non-leaf frame pointers and then + // check to see if we should eliminate all frame pointers. + if (NoFramePointerElimNonLeaf && !NoFramePointerElim) { const MachineFrameInfo *MFI = MF.getFrameInfo(); return MFI->hasCalls(); } - return false; + + return NoFramePointerElim; } /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp index 52983ff..dcc5f61 100644 --- a/lib/Target/TargetRegisterInfo.cpp +++ b/lib/Target/TargetRegisterInfo.cpp @@ -22,14 +22,14 @@ using namespace llvm; TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR, regclass_iterator RCB, regclass_iterator RCE, + const char *const *subregindexnames, int CFSO, int CFDO, const unsigned* subregs, const unsigned subregsize, - const unsigned* superregs, const unsigned superregsize, const unsigned* aliases, const unsigned aliasessize) : SubregHash(subregs), SubregHashSize(subregsize), - SuperregHash(superregs), SuperregHashSize(superregsize), AliasesHash(aliases), AliasesHashSize(aliasessize), - Desc(D), NumRegs(NR), RegClassBegin(RCB), RegClassEnd(RCE) { + Desc(D), SubRegIndexNames(subregindexnames), NumRegs(NR), + RegClassBegin(RCB), RegClassEnd(RCE) { assert(NumRegs < FirstVirtualRegister && "Target has too many physical registers!"); diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 6b403c1..40a6a7b 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -186,20 +186,73 @@ struct X86Operand : public MCParsedAsmOperand { bool isImm() const { return Kind == Immediate; } - bool isImmSExt8() const { - // Accept immediates which fit in 8 bits when sign extended, and - // non-absolute immediates. + bool isImmSExti16i8() const { if (!isImm()) return false; - if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) { - int64_t Value = CE->getValue(); - return Value == (int64_t) (int8_t) Value; - } + // If this isn't a constant expr, just assume it fits and let relaxation + // handle it. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) + return true; - return true; + // Otherwise, check the value is in a range that makes sense for this + // extension. + uint64_t Value = CE->getValue(); + return (( Value <= 0x000000000000007FULL)|| + (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)|| + (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); } - + bool isImmSExti32i8() const { + if (!isImm()) + return false; + + // If this isn't a constant expr, just assume it fits and let relaxation + // handle it. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) + return true; + + // Otherwise, check the value is in a range that makes sense for this + // extension. + uint64_t Value = CE->getValue(); + return (( Value <= 0x000000000000007FULL)|| + (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)|| + (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + } + bool isImmSExti64i8() const { + if (!isImm()) + return false; + + // If this isn't a constant expr, just assume it fits and let relaxation + // handle it. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) + return true; + + // Otherwise, check the value is in a range that makes sense for this + // extension. + uint64_t Value = CE->getValue(); + return (( Value <= 0x000000000000007FULL)|| + (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + } + bool isImmSExti64i32() const { + if (!isImm()) + return false; + + // If this isn't a constant expr, just assume it fits and let relaxation + // handle it. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) + return true; + + // Otherwise, check the value is in a range that makes sense for this + // extension. + uint64_t Value = CE->getValue(); + return (( Value <= 0x000000007FFFFFFFULL)|| + (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + } + bool isMem() const { return Kind == Memory; } bool isAbsMem() const { @@ -231,12 +284,6 @@ struct X86Operand : public MCParsedAsmOperand { addExpr(Inst, getImm()); } - void addImmSExt8Operands(MCInst &Inst, unsigned N) const { - // FIXME: Support user customization of the render method. - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - void addMemOperands(MCInst &Inst, unsigned N) const { assert((N == 5) && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); @@ -535,6 +582,21 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { bool X86ATTAsmParser:: ParseInstruction(const StringRef &Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // The various flavors of pushf and popf use Requires<In32BitMode> and + // Requires<In64BitMode>, but the assembler doesn't yet implement that. + // For now, just do a manual check to prevent silent misencoding. + if (Is64Bit) { + if (Name == "popfl") + return Error(NameLoc, "popfl cannot be encoded in 64-bit mode"); + else if (Name == "pushfl") + return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode"); + } else { + if (Name == "popfq") + return Error(NameLoc, "popfq cannot be encoded in 32-bit mode"); + else if (Name == "pushfq") + return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode"); + } + // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to // represent alternative syntaxes in the .td file, without requiring // instruction duplication. @@ -547,9 +609,66 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, .Case("repe", "rep") .Case("repz", "rep") .Case("repnz", "repne") + .Case("pushf", Is64Bit ? "pushfq" : "pushfl") + .Case("popf", Is64Bit ? "popfq" : "popfl") + .Case("retl", Is64Bit ? "retl" : "ret") + .Case("retq", Is64Bit ? "ret" : "retq") + .Case("setz", "sete") + .Case("setnz", "setne") + .Case("jz", "je") + .Case("jnz", "jne") + .Case("cmovcl", "cmovbl") + .Case("cmovcl", "cmovbl") + .Case("cmovnal", "cmovbel") + .Case("cmovnbl", "cmovael") + .Case("cmovnbel", "cmoval") + .Case("cmovncl", "cmovael") + .Case("cmovngl", "cmovlel") + .Case("cmovnl", "cmovgel") + .Case("cmovngl", "cmovlel") + .Case("cmovngel", "cmovll") + .Case("cmovnll", "cmovgel") + .Case("cmovnlel", "cmovgl") + .Case("cmovnzl", "cmovnel") + .Case("cmovzl", "cmovel") .Default(Name); + + // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. + const MCExpr *ExtraImmOp = 0; + if (PatchedName.startswith("cmp") && + (PatchedName.endswith("ss") || PatchedName.endswith("sd") || + PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { + unsigned SSEComparisonCode = StringSwitch<unsigned>( + PatchedName.slice(3, PatchedName.size() - 2)) + .Case("eq", 0) + .Case("lt", 1) + .Case("le", 2) + .Case("unord", 3) + .Case("neq", 4) + .Case("nlt", 5) + .Case("nle", 6) + .Case("ord", 7) + .Default(~0U); + if (SSEComparisonCode != ~0U) { + ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode, + getParser().getContext()); + if (PatchedName.endswith("ss")) { + PatchedName = "cmpss"; + } else if (PatchedName.endswith("sd")) { + PatchedName = "cmpsd"; + } else if (PatchedName.endswith("ps")) { + PatchedName = "cmpps"; + } else { + assert(PatchedName.endswith("pd") && "Unexpected mnemonic!"); + PatchedName = "cmppd"; + } + } + } Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); + if (ExtraImmOp) + Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); + if (getLexer().isNot(AsmToken::EndOfStatement)) { // Parse '*' modifier. @@ -564,7 +683,7 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, Operands.push_back(Op); else return true; - + while (getLexer().is(AsmToken::Comma)) { Parser.Lex(); // Eat the comma. @@ -587,6 +706,17 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, Operands.erase(Operands.begin() + 1); } + // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as + // "f{mul*,add*,sub*,div*} $op" + if ((Name.startswith("fmul") || Name.startswith("fadd") || + Name.startswith("fsub") || Name.startswith("fdiv")) && + Operands.size() == 3 && + static_cast<X86Operand*>(Operands[2])->isReg() && + static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) { + delete Operands[2]; + Operands.erase(Operands.begin() + 2); + } + return false; } @@ -622,6 +752,31 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { return false; } +/// LowerMOffset - Lower an 'moffset' form of an instruction, which just has a +/// imm operand, to having "rm" or "mr" operands with the offset in the disp +/// field. +static void LowerMOffset(MCInst &Inst, unsigned Opc, unsigned RegNo, + bool isMR) { + MCOperand Disp = Inst.getOperand(0); + + // Start over with an empty instruction. + Inst = MCInst(); + Inst.setOpcode(Opc); + + if (!isMR) + Inst.addOperand(MCOperand::CreateReg(RegNo)); + + // Add the mem operand. + Inst.addOperand(MCOperand::CreateReg(0)); // Segment + Inst.addOperand(MCOperand::CreateImm(1)); // Scale + Inst.addOperand(MCOperand::CreateReg(0)); // IndexReg + Inst.addOperand(Disp); // Displacement + Inst.addOperand(MCOperand::CreateReg(0)); // BaseReg + + if (isMR) + Inst.addOperand(MCOperand::CreateReg(RegNo)); +} + // FIXME: Custom X86 cleanup function to implement a temporary hack to handle // matching INCL/DECL correctly for x86_64. This needs to be replaced by a // proper mechanism for supporting (ambiguous) feature dependent instructions. @@ -637,6 +792,14 @@ void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) { case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break; case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break; case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break; + + // moffset instructions are x86-32 only. + case X86::MOV8o8a: LowerMOffset(Inst, X86::MOV8rm , X86::AL , false); break; + case X86::MOV16o16a: LowerMOffset(Inst, X86::MOV16rm, X86::AX , false); break; + case X86::MOV32o32a: LowerMOffset(Inst, X86::MOV32rm, X86::EAX, false); break; + case X86::MOV8ao8: LowerMOffset(Inst, X86::MOV8mr , X86::AL , true); break; + case X86::MOV16ao16: LowerMOffset(Inst, X86::MOV16mr, X86::AX , true); break; + case X86::MOV32ao32: LowerMOffset(Inst, X86::MOV32mr, X86::EAX, true); break; } } @@ -673,6 +836,8 @@ X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> bool MatchW = MatchInstructionImpl(Operands, Inst); Tmp[Base.size()] = 'l'; bool MatchL = MatchInstructionImpl(Operands, Inst); + Tmp[Base.size()] = 'q'; + bool MatchQ = MatchInstructionImpl(Operands, Inst); // Restore the old token. Op->setTokenValue(Base); @@ -680,7 +845,7 @@ X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> // If exactly one matched, then we treat that as a successful match (and the // instruction will already have been filled in correctly, since the failing // matches won't have modified it). - if (MatchB + MatchW + MatchL == 2) + if (MatchB + MatchW + MatchL + MatchQ == 3) return false; // Otherwise, the match failed. diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index 8b0ed1c..183213d 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -58,12 +58,11 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { SetupMachineFunction(MF); if (Subtarget->isTargetCOFF()) { - const Function *F = MF.getFunction(); - OutStreamer.EmitRawText("\t.def\t " + Twine(CurrentFnSym->getName()) + - ";\t.scl\t" + - Twine(F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT) + - ";\t.type\t" + Twine(COFF::DT_FCN << COFF::N_BTSHFT) - + ";\t.endef"); + bool Intrn = MF.getFunction()->hasInternalLinkage(); + OutStreamer.BeginCOFFSymbolDef(CurrentFnSym); + OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::C_STAT : COFF::C_EXT); + OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT); + OutStreamer.EndCOFFSymbolDef(); } // Have common code print out the function header with linkage info etc. @@ -571,44 +570,55 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); // Emit type information for external functions - for (X86COFFMachineModuleInfo::stub_iterator I = COFFMMI.stub_begin(), - E = COFFMMI.stub_end(); I != E; ++I) { - OutStreamer.EmitRawText("\t.def\t " + Twine(I->getKeyData()) + - ";\t.scl\t" + Twine(COFF::C_EXT) + - ";\t.type\t" + - Twine(COFF::DT_FCN << COFF::N_BTSHFT) + - ";\t.endef"); + typedef X86COFFMachineModuleInfo::externals_iterator externals_iterator; + for (externals_iterator I = COFFMMI.externals_begin(), + E = COFFMMI.externals_end(); + I != E; ++I) { + OutStreamer.BeginCOFFSymbolDef(CurrentFnSym); + OutStreamer.EmitCOFFSymbolStorageClass(COFF::C_EXT); + OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT); + OutStreamer.EndCOFFSymbolDef(); } - if (Subtarget->isTargetCygMing()) { - // Necessary for dllexport support - std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals; - - const TargetLoweringObjectFileCOFF &TLOFCOFF = - static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering()); - - for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) - if (I->hasDLLExportLinkage()) - DLLExportedFns.push_back(Mang->getSymbol(I)); - - for (Module::const_global_iterator I = M.global_begin(), - E = M.global_end(); I != E; ++I) - if (I->hasDLLExportLinkage()) - DLLExportedGlobals.push_back(Mang->getSymbol(I)); - - // Output linker support code for dllexported globals on windows. - if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) { - OutStreamer.SwitchSection(TLOFCOFF.getCOFFSection(".section .drectve", - true, - SectionKind::getMetadata())); - for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) - OutStreamer.EmitRawText("\t.ascii \" -export:" + - Twine(DLLExportedGlobals[i]->getName()) + - ",data\""); - - for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) - OutStreamer.EmitRawText("\t.ascii \" -export:" + - Twine(DLLExportedFns[i]->getName()) + "\""); + // Necessary for dllexport support + std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals; + + const TargetLoweringObjectFileCOFF &TLOFCOFF = + static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering()); + + for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) + if (I->hasDLLExportLinkage()) + DLLExportedFns.push_back(Mang->getSymbol(I)); + + for (Module::const_global_iterator I = M.global_begin(), + E = M.global_end(); I != E; ++I) + if (I->hasDLLExportLinkage()) + DLLExportedGlobals.push_back(Mang->getSymbol(I)); + + // Output linker support code for dllexported globals on windows. + if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) { + OutStreamer.SwitchSection(TLOFCOFF.getDrectveSection()); + SmallString<128> name; + for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) { + if (Subtarget->isTargetWindows()) + name = " /EXPORT:"; + else + name = " -export:"; + name += DLLExportedGlobals[i]->getName(); + if (Subtarget->isTargetWindows()) + name += ",DATA"; + else + name += ",data"; + OutStreamer.EmitBytes(name, 0); + } + + for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) { + if (Subtarget->isTargetWindows()) + name = " /EXPORT:"; + else + name = " -export:"; + name += DLLExportedFns[i]->getName(); + OutStreamer.EmitBytes(name, 0); } } } diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h index 95984b2..b5a7f8d 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h @@ -31,7 +31,7 @@ class MCInst; class MCStreamer; class MCSymbol; -class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter { +class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { const X86Subtarget *Subtarget; public: explicit X86AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index effc8ed..4edeca9 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -224,6 +224,60 @@ static void LowerUnaryToTwoAddr(MCInst &OutMI, unsigned NewOpc) { OutMI.addOperand(OutMI.getOperand(0)); } +/// \brief Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with +/// a short fixed-register form. +static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) { + unsigned ImmOp = Inst.getNumOperands() - 1; + assert(Inst.getOperand(0).isReg() && Inst.getOperand(ImmOp).isImm() && + ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() && + Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) || + Inst.getNumOperands() == 2) && "Unexpected instruction!"); + + // Check whether the destination register can be fixed. + unsigned Reg = Inst.getOperand(0).getReg(); + if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX) + return; + + // If so, rewrite the instruction. + MCOperand Saved = Inst.getOperand(ImmOp); + Inst = MCInst(); + Inst.setOpcode(Opcode); + Inst.addOperand(Saved); +} + +/// \brief Simplify things like MOV32rm to MOV32o32a. +static void SimplifyShortMoveForm(MCInst &Inst, unsigned Opcode) { + bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg(); + unsigned AddrBase = IsStore; + unsigned RegOp = IsStore ? 0 : 5; + unsigned AddrOp = AddrBase + 3; + assert(Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() && + Inst.getOperand(AddrBase + 0).isReg() && // base + Inst.getOperand(AddrBase + 1).isImm() && // scale + Inst.getOperand(AddrBase + 2).isReg() && // index register + (Inst.getOperand(AddrOp).isExpr() || // address + Inst.getOperand(AddrOp).isImm())&& + Inst.getOperand(AddrBase + 4).isReg() && // segment + "Unexpected instruction!"); + + // Check whether the destination register can be fixed. + unsigned Reg = Inst.getOperand(RegOp).getReg(); + if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX) + return; + + // Check whether this is an absolute address. + if (Inst.getOperand(AddrBase + 0).getReg() != 0 || + Inst.getOperand(AddrBase + 2).getReg() != 0 || + Inst.getOperand(AddrBase + 4).getReg() != 0 || + Inst.getOperand(AddrBase + 1).getImm() != 1) + return; + + // If so, rewrite the instruction. + MCOperand Saved = Inst.getOperand(AddrOp); + Inst = MCInst(); + Inst.setOpcode(Opcode); + Inst.addOperand(Saved); +} void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(MI->getOpcode()); @@ -309,8 +363,32 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV64r0 -> MOV32r0 LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr break; - - + + // TAILJMPr, TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have + // register inputs modeled as normal uses instead of implicit uses. As such, + // truncate off all but the first operand (the callee). FIXME: Change isel. + case X86::TAILJMPr: + case X86::TAILJMPr64: + case X86::CALL64r: + case X86::CALL64pcrel32: { + unsigned Opcode = OutMI.getOpcode(); + MCOperand Saved = OutMI.getOperand(0); + OutMI = MCInst(); + OutMI.setOpcode(Opcode); + OutMI.addOperand(Saved); + break; + } + + // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions. + case X86::TAILJMPd: + case X86::TAILJMPd64: { + MCOperand Saved = OutMI.getOperand(0); + OutMI = MCInst(); + OutMI.setOpcode(X86::TAILJMP_1); + OutMI.addOperand(Saved); + break; + } + // The assembler backend wants to see branches in their small form and relax // them to their large form. The JIT can only handle the large form because // it does not do relaxation. For now, translate the large form to the @@ -332,6 +410,61 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { case X86::JGE_4: OutMI.setOpcode(X86::JGE_1); break; case X86::JLE_4: OutMI.setOpcode(X86::JLE_1); break; case X86::JG_4: OutMI.setOpcode(X86::JG_1); break; + + // We don't currently select the correct instruction form for instructions + // which have a short %eax, etc. form. Handle this by custom lowering, for + // now. + // + // Note, we are currently not handling the following instructions: + // MOV64ao8, MOV64o8a + // XCHG16ar, XCHG32ar, XCHG64ar + case X86::MOV8mr_NOREX: + case X86::MOV8mr: SimplifyShortMoveForm(OutMI, X86::MOV8ao8); break; + case X86::MOV8rm_NOREX: + case X86::MOV8rm: SimplifyShortMoveForm(OutMI, X86::MOV8o8a); break; + case X86::MOV16mr: SimplifyShortMoveForm(OutMI, X86::MOV16ao16); break; + case X86::MOV16rm: SimplifyShortMoveForm(OutMI, X86::MOV16o16a); break; + case X86::MOV32mr: SimplifyShortMoveForm(OutMI, X86::MOV32ao32); break; + case X86::MOV32rm: SimplifyShortMoveForm(OutMI, X86::MOV32o32a); break; + case X86::MOV64mr: SimplifyShortMoveForm(OutMI, X86::MOV64ao64); break; + case X86::MOV64rm: SimplifyShortMoveForm(OutMI, X86::MOV64o64a); break; + + case X86::ADC8ri: SimplifyShortImmForm(OutMI, X86::ADC8i8); break; + case X86::ADC16ri: SimplifyShortImmForm(OutMI, X86::ADC16i16); break; + case X86::ADC32ri: SimplifyShortImmForm(OutMI, X86::ADC32i32); break; + case X86::ADC64ri32: SimplifyShortImmForm(OutMI, X86::ADC64i32); break; + case X86::ADD8ri: SimplifyShortImmForm(OutMI, X86::ADD8i8); break; + case X86::ADD16ri: SimplifyShortImmForm(OutMI, X86::ADD16i16); break; + case X86::ADD32ri: SimplifyShortImmForm(OutMI, X86::ADD32i32); break; + case X86::ADD64ri32: SimplifyShortImmForm(OutMI, X86::ADD64i32); break; + case X86::AND8ri: SimplifyShortImmForm(OutMI, X86::AND8i8); break; + case X86::AND16ri: SimplifyShortImmForm(OutMI, X86::AND16i16); break; + case X86::AND32ri: SimplifyShortImmForm(OutMI, X86::AND32i32); break; + case X86::AND64ri32: SimplifyShortImmForm(OutMI, X86::AND64i32); break; + case X86::CMP8ri: SimplifyShortImmForm(OutMI, X86::CMP8i8); break; + case X86::CMP16ri: SimplifyShortImmForm(OutMI, X86::CMP16i16); break; + case X86::CMP32ri: SimplifyShortImmForm(OutMI, X86::CMP32i32); break; + case X86::CMP64ri32: SimplifyShortImmForm(OutMI, X86::CMP64i32); break; + case X86::OR8ri: SimplifyShortImmForm(OutMI, X86::OR8i8); break; + case X86::OR16ri: SimplifyShortImmForm(OutMI, X86::OR16i16); break; + case X86::OR32ri: SimplifyShortImmForm(OutMI, X86::OR32i32); break; + case X86::OR64ri32: SimplifyShortImmForm(OutMI, X86::OR64i32); break; + case X86::SBB8ri: SimplifyShortImmForm(OutMI, X86::SBB8i8); break; + case X86::SBB16ri: SimplifyShortImmForm(OutMI, X86::SBB16i16); break; + case X86::SBB32ri: SimplifyShortImmForm(OutMI, X86::SBB32i32); break; + case X86::SBB64ri32: SimplifyShortImmForm(OutMI, X86::SBB64i32); break; + case X86::SUB8ri: SimplifyShortImmForm(OutMI, X86::SUB8i8); break; + case X86::SUB16ri: SimplifyShortImmForm(OutMI, X86::SUB16i16); break; + case X86::SUB32ri: SimplifyShortImmForm(OutMI, X86::SUB32i32); break; + case X86::SUB64ri32: SimplifyShortImmForm(OutMI, X86::SUB64i32); break; + case X86::TEST8ri: SimplifyShortImmForm(OutMI, X86::TEST8i8); break; + case X86::TEST16ri: SimplifyShortImmForm(OutMI, X86::TEST16i16); break; + case X86::TEST32ri: SimplifyShortImmForm(OutMI, X86::TEST32i32); break; + case X86::TEST64ri32: SimplifyShortImmForm(OutMI, X86::TEST64i32); break; + case X86::XOR8ri: SimplifyShortImmForm(OutMI, X86::XOR8i8); break; + case X86::XOR16ri: SimplifyShortImmForm(OutMI, X86::XOR16i16); break; + case X86::XOR32ri: SimplifyShortImmForm(OutMI, X86::XOR32i32); break; + case X86::XOR64ri32: SimplifyShortImmForm(OutMI, X86::XOR64i32); break; } } @@ -346,7 +479,7 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, // cast away const; DIetc do not take const operands for some reason. DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); if (V.getContext().isSubprogram()) - O << DISubprogram(V.getContext().getNode()).getDisplayName() << ":"; + O << DISubprogram(V.getContext()).getDisplayName() << ":"; O << V.getName(); O << " <- "; // Frame address. Currently handles register +- offset only. diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.h b/lib/Target/X86/AsmPrinter/X86MCInstLower.h index ebd23f6..9e5474f 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.h +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.h @@ -25,7 +25,7 @@ namespace llvm { class X86Subtarget; /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. -class VISIBILITY_HIDDEN X86MCInstLower { +class LLVM_LIBRARY_VISIBILITY X86MCInstLower { MCContext &Ctx; Mangler *Mang; X86AsmPrinter &AsmPrinter; diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index da6bb91..1334820 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -39,7 +39,12 @@ set(sources if( CMAKE_CL_64 ) enable_language(ASM_MASM) - set(sources ${sources} X86CompilationCallback_Win64.asm) + ADD_CUSTOM_COMMAND( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj + COMMAND ${CMAKE_ASM_MASM_COMPILER} /Fo ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj /c ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm + ) + set(sources ${sources} ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj) endif() add_llvm_target(X86CodeGen ${sources}) diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 62e7357..8a5a630 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -155,7 +155,57 @@ static void translateRegister(MCInst &mcInst, Reg reg) { /// /// @param mcInst - The MCInst to append to. /// @param immediate - The immediate value to append. -static void translateImmediate(MCInst &mcInst, uint64_t immediate) { +/// @param operand - The operand, as stored in the descriptor table. +/// @param insn - The internal instruction. +static void translateImmediate(MCInst &mcInst, + uint64_t immediate, + OperandSpecifier &operand, + InternalInstruction &insn) { + // Sign-extend the immediate if necessary. + + OperandType type = operand.type; + + if (type == TYPE_RELv) { + switch (insn.displacementSize) { + default: + break; + case 8: + type = TYPE_MOFFS8; + break; + case 16: + type = TYPE_MOFFS16; + break; + case 32: + type = TYPE_MOFFS32; + break; + case 64: + type = TYPE_MOFFS64; + break; + } + } + + switch (type) { + case TYPE_MOFFS8: + case TYPE_REL8: + if(immediate & 0x80) + immediate |= ~(0xffull); + break; + case TYPE_MOFFS16: + if(immediate & 0x8000) + immediate |= ~(0xffffull); + break; + case TYPE_MOFFS32: + case TYPE_REL32: + case TYPE_REL64: + if(immediate & 0x80000000) + immediate |= ~(0xffffffffull); + break; + case TYPE_MOFFS64: + default: + // operand is 64 bits wide. Do nothing. + break; + } + mcInst.addOperand(MCOperand::CreateImm(immediate)); } @@ -370,8 +420,7 @@ static bool translateRM(MCInst &mcInst, case TYPE_XMM64: case TYPE_XMM128: case TYPE_DEBUGREG: - case TYPE_CR32: - case TYPE_CR64: + case TYPE_CONTROLREG: return translateRMRegister(mcInst, insn); case TYPE_M: case TYPE_M8: @@ -447,8 +496,10 @@ static bool translateOperand(MCInst &mcInst, case ENCODING_IO: case ENCODING_Iv: case ENCODING_Ia: - translateImmediate(mcInst, - insn.immediates[insn.numImmediatesTranslated++]); + translateImmediate(mcInst, + insn.immediates[insn.numImmediatesTranslated++], + operand, + insn); return false; case ENCODING_RB: case ENCODING_RW: diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index 64f6b2d..6c3ff6b 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -1034,14 +1034,10 @@ static int readModRM(struct InternalInstruction* insn) { if (index > 7) \ *valid = 0; \ return prefix##_DR0 + index; \ - case TYPE_CR32: \ - if (index > 7) \ - *valid = 0; \ - return prefix##_ECR0 + index; \ - case TYPE_CR64: \ + case TYPE_CONTROLREG: \ if (index > 8) \ *valid = 0; \ - return prefix##_RCR0 + index; \ + return prefix##_CR0 + index; \ } \ } diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index 462cf68..28ba86b 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -225,26 +225,16 @@ extern "C" { ENTRY(DR6) \ ENTRY(DR7) -#define REGS_CONTROL_32BIT \ - ENTRY(ECR0) \ - ENTRY(ECR1) \ - ENTRY(ECR2) \ - ENTRY(ECR3) \ - ENTRY(ECR4) \ - ENTRY(ECR5) \ - ENTRY(ECR6) \ - ENTRY(ECR7) - -#define REGS_CONTROL_64BIT \ - ENTRY(RCR0) \ - ENTRY(RCR1) \ - ENTRY(RCR2) \ - ENTRY(RCR3) \ - ENTRY(RCR4) \ - ENTRY(RCR5) \ - ENTRY(RCR6) \ - ENTRY(RCR7) \ - ENTRY(RCR8) +#define REGS_CONTROL \ + ENTRY(CR0) \ + ENTRY(CR1) \ + ENTRY(CR2) \ + ENTRY(CR3) \ + ENTRY(CR4) \ + ENTRY(CR5) \ + ENTRY(CR6) \ + ENTRY(CR7) \ + ENTRY(CR8) #define ALL_EA_BASES \ EA_BASES_16BIT \ @@ -264,8 +254,7 @@ extern "C" { REGS_XMM \ REGS_SEGMENT \ REGS_DEBUG \ - REGS_CONTROL_32BIT \ - REGS_CONTROL_64BIT \ + REGS_CONTROL \ ENTRY(RIP) /* diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 4a7cd57..0f33f52 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -280,8 +280,7 @@ struct ContextDecision { ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \ ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \ ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \ - ENUM_ENTRY(TYPE_CR32, "4-byte control register operand") \ - ENUM_ENTRY(TYPE_CR64, "8-byte") \ + ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \ \ ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \ ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \ diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp index 5e80845..dab070e 100644 --- a/lib/Target/X86/SSEDomainFix.cpp +++ b/lib/Target/X86/SSEDomainFix.cpp @@ -155,9 +155,7 @@ char SSEDomainFixPass::ID = 0; /// Translate TRI register number to an index into our smaller tables of /// interesting registers. Return -1 for boring registers. int SSEDomainFixPass::RegIndex(unsigned reg) { - // Registers are sorted lexicographically. - // We just need them to be consecutive, ordering doesn't matter. - assert(X86::XMM9 == X86::XMM0+NumRegs-1 && "Unexpected sort"); + assert(X86::XMM15 == X86::XMM0+NumRegs-1 && "Unexpected sort"); reg -= X86::XMM0; return reg < NumRegs ? (int) reg : -1; } diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp index ba9c1d0..151087f 100644 --- a/lib/Target/X86/X86AsmBackend.cpp +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -12,6 +12,7 @@ #include "X86FixupKinds.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" @@ -43,20 +44,19 @@ public: X86AsmBackend(const Target &T) : TargetAsmBackend(T) {} - void ApplyFixup(const MCAsmFixup &Fixup, MCDataFragment &DF, + void ApplyFixup(const MCFixup &Fixup, MCDataFragment &DF, uint64_t Value) const { - unsigned Size = 1 << getFixupKindLog2Size(Fixup.Kind); + unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind()); - assert(Fixup.Offset + Size <= DF.getContents().size() && + assert(Fixup.getOffset() + Size <= DF.getContents().size() && "Invalid fixup offset!"); for (unsigned i = 0; i != Size; ++i) - DF.getContents()[Fixup.Offset + i] = uint8_t(Value >> (i * 8)); + DF.getContents()[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); } - bool MayNeedRelaxation(const MCInst &Inst, - const SmallVectorImpl<MCAsmFixup> &Fixups) const; + bool MayNeedRelaxation(const MCInst &Inst) const; - void RelaxInstruction(const MCInstFragment *IF, MCInst &Res) const; + void RelaxInstruction(const MCInst &Inst, MCInst &Res) const; bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const; }; @@ -75,6 +75,7 @@ static unsigned getRelaxedOpcode(unsigned Op) { case X86::JG_1: return X86::JG_4; case X86::JLE_1: return X86::JLE_4; case X86::JL_1: return X86::JL_4; + case X86::TAILJMP_1: case X86::JMP_1: return X86::JMP_4; case X86::JNE_1: return X86::JNE_4; case X86::JNO_1: return X86::JNO_4; @@ -86,35 +87,33 @@ static unsigned getRelaxedOpcode(unsigned Op) { } } -bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst, - const SmallVectorImpl<MCAsmFixup> &Fixups) const { - // Check for a 1byte pcrel fixup, and enforce that we would know how to relax - // this instruction. - for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { - if (unsigned(Fixups[i].Kind) == X86::reloc_pcrel_1byte) { - assert(getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode()); - return true; - } - } +bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const { + // Check if this instruction is ever relaxable. + if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode()) + return false; - return false; + // If so, just assume it can be relaxed. Once we support relaxing more complex + // instructions we should check that the instruction actually has symbolic + // operands before doing this, but we need to be careful about things like + // PCrel. + return true; } // FIXME: Can tblgen help at all here to verify there aren't other instructions // we can relax? -void X86AsmBackend::RelaxInstruction(const MCInstFragment *IF, - MCInst &Res) const { +void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel. - unsigned RelaxedOp = getRelaxedOpcode(IF->getInst().getOpcode()); + unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode()); - if (RelaxedOp == IF->getInst().getOpcode()) { + if (RelaxedOp == Inst.getOpcode()) { SmallString<256> Tmp; raw_svector_ostream OS(Tmp); - IF->getInst().dump_pretty(OS); + Inst.dump_pretty(OS); + OS << "\n"; report_fatal_error("unexpected instruction to relax: " + OS.str()); } - Res = IF->getInst(); + Res = Inst; Res.setOpcode(RelaxedOp); } @@ -199,6 +198,18 @@ public: } }; +class ELFX86_32AsmBackend : public ELFX86AsmBackend { +public: + ELFX86_32AsmBackend(const Target &T) + : ELFX86AsmBackend(T) {} +}; + +class ELFX86_64AsmBackend : public ELFX86AsmBackend { +public: + ELFX86_64AsmBackend(const Target &T) + : ELFX86AsmBackend(T) {} +}; + class DarwinX86AsmBackend : public X86AsmBackend { public: DarwinX86AsmBackend(const Target &T) @@ -210,7 +221,8 @@ public: bool isVirtualSection(const MCSection &Section) const { const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section); return (SMO.getType() == MCSectionMachO::S_ZEROFILL || - SMO.getType() == MCSectionMachO::S_GB_ZEROFILL); + SMO.getType() == MCSectionMachO::S_GB_ZEROFILL || + SMO.getType() == MCSectionMachO::S_THREAD_LOCAL_ZEROFILL); } }; @@ -247,6 +259,26 @@ public: const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section); return SMO.getType() == MCSectionMachO::S_CSTRING_LITERALS; } + + virtual bool isSectionAtomizable(const MCSection &Section) const { + const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section); + // Fixed sized data sections are uniqued, they cannot be diced into atoms. + switch (SMO.getType()) { + default: + return true; + + case MCSectionMachO::S_4BYTE_LITERALS: + case MCSectionMachO::S_8BYTE_LITERALS: + case MCSectionMachO::S_16BYTE_LITERALS: + case MCSectionMachO::S_LITERAL_POINTERS: + case MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS: + case MCSectionMachO::S_LAZY_SYMBOL_POINTERS: + case MCSectionMachO::S_MOD_INIT_FUNC_POINTERS: + case MCSectionMachO::S_MOD_TERM_FUNC_POINTERS: + case MCSectionMachO::S_INTERPOSING: + return false; + } + } }; } @@ -257,7 +289,7 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, case Triple::Darwin: return new DarwinX86_32AsmBackend(T); default: - return new ELFX86AsmBackend(T); + return new ELFX86_32AsmBackend(T); } } @@ -267,6 +299,6 @@ TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T, case Triple::Darwin: return new DarwinX86_64AsmBackend(T); default: - return new ELFX86AsmBackend(T); + return new ELFX86_64AsmBackend(T); } } diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h index eece462..98ab2a6 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.h +++ b/lib/Target/X86/X86COFFMachineModuleInfo.h @@ -15,7 +15,7 @@ #define X86COFF_MACHINEMODULEINFO_H #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/DenseSet.h" #include "X86MachineFunctionInfo.h" namespace llvm { @@ -25,18 +25,18 @@ namespace llvm { /// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation /// for X86 COFF targets. class X86COFFMachineModuleInfo : public MachineModuleInfoImpl { - StringSet<> CygMingStubs; + DenseSet<MCSymbol const *> Externals; public: X86COFFMachineModuleInfo(const MachineModuleInfo &) {} virtual ~X86COFFMachineModuleInfo(); - void addExternalFunction(StringRef Name) { - CygMingStubs.insert(Name); + void addExternalFunction(MCSymbol* Symbol) { + Externals.insert(Symbol); } - typedef StringSet<>::const_iterator stub_iterator; - stub_iterator stub_begin() const { return CygMingStubs.begin(); } - stub_iterator stub_end() const { return CygMingStubs.end(); } + typedef DenseSet<MCSymbol const *>::const_iterator externals_iterator; + externals_iterator externals_begin() const { return Externals.begin(); } + externals_iterator externals_end() const { return Externals.end(); } }; diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index fd15efd..a5774e1 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -307,6 +307,20 @@ def CC_X86_32_FastCall : CallingConv<[ CCDelegateTo<CC_X86_32_Common> ]>; +def CC_X86_32_ThisCall : CallingConv<[ + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType<i32>>, + + // The 'nest' parameter, if any, is passed in EAX. + CCIfNest<CCAssignToReg<[EAX]>>, + + // The first integer argument is passed in ECX + CCIfType<[i32], CCAssignToReg<[ECX]>>, + + // Otherwise, same as everything else. + CCDelegateTo<CC_X86_32_Common> +]>; + def CC_X86_32_FastCC : CallingConv<[ // Handles byval parameters. Note that we can't rely on the delegation // to CC_X86_32_Common for this because that happens after code that diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index ff9208c..1bc5eb7 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -180,6 +180,8 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC, if (CC == CallingConv::X86_FastCall) return CC_X86_32_FastCall; + else if (CC == CallingConv::X86_ThisCall) + return CC_X86_32_ThisCall; else if (CC == CallingConv::Fast) return CC_X86_32_FastCC; else if (CC == CallingConv::GHC) @@ -324,7 +326,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, unsigned &ResultReg) { - unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src); + unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, + Src, /*TODO: Kill=*/false); if (RR != 0) { ResultReg = RR; @@ -416,7 +419,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { (S == 1 || S == 2 || S == 4 || S == 8)) { // Scaled-index addressing. Scale = S; - IndexReg = getRegForGEPIndex(Op); + IndexReg = getRegForGEPIndex(Op).first; if (IndexReg == 0) return false; } else @@ -802,7 +805,7 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) { unsigned ResultReg = getRegForValue(I->getOperand(0)); if (ResultReg == 0) return false; // Set the high bits to zero. - ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg); + ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); if (ResultReg == 0) return false; UpdateValueMap(I, ResultReg); return true; @@ -913,7 +916,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) { const MachineInstr &MI = *RI; - if (MI.modifiesRegister(Reg)) { + if (MI.definesRegister(Reg)) { unsigned Src, Dst, SrcSR, DstSR; if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) { @@ -1019,14 +1022,14 @@ bool X86FastISel::X86SelectShift(const Instruction *I) { unsigned Op1Reg = getRegForValue(I->getOperand(1)); if (Op1Reg == 0) return false; - TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC); + TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC, DL); // The shift instruction uses X86::CL. If we defined a super-register // of X86::CL, emit an EXTRACT_SUBREG to precisely describe what // we're doing here. if (CReg != X86::CL) BuildMI(MBB, DL, TII.get(TargetOpcode::EXTRACT_SUBREG), X86::CL) - .addReg(CReg).addImm(X86::SUBREG_8BIT); + .addReg(CReg).addImm(X86::sub_8bit); unsigned ResultReg = createResultReg(RC); BuildMI(MBB, DL, TII.get(OpReg), ResultReg).addReg(Op0Reg); @@ -1133,7 +1136,8 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) { // Then issue an extract_subreg. unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8, - CopyReg, X86::SUBREG_8BIT); + CopyReg, /*Kill=*/true, + X86::sub_8bit); if (!ResultReg) return false; @@ -1436,7 +1440,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { } case CCValAssign::BCvt: { unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(), - ISD::BIT_CONVERT, Arg); + ISD::BIT_CONVERT, Arg, /*TODO: Kill=*/false); assert(BC != 0 && "Failed to emit a bitcast!"); Arg = BC; ArgVT = VA.getLocVT(); @@ -1447,7 +1451,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (VA.isRegLoc()) { TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT); bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(), - Arg, RC, RC); + Arg, RC, RC, DL); assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; Emitted = true; RegArgs.push_back(VA.getLocReg()); @@ -1473,7 +1477,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (Subtarget->isPICStyleGOT()) { TargetRegisterClass *RC = X86::GR32RegisterClass; unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF); - bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC); + bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC, + DL); assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; Emitted = true; } @@ -1552,7 +1557,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { unsigned ResultReg = createResultReg(DstRC); bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - RVLocs[0].getLocReg(), DstRC, SrcRC); + RVLocs[0].getLocReg(), DstRC, SrcRC, DL); assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; Emitted = true; if (CopyVT != RVLocs[0].getValVT()) { diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp index 541083f..747683d 100644 --- a/lib/Target/X86/X86FloatingPointRegKill.cpp +++ b/lib/Target/X86/X86FloatingPointRegKill.cpp @@ -14,7 +14,6 @@ #define DEBUG_TYPE "x86-codegen" #include "X86.h" #include "X86InstrInfo.h" -#include "X86Subtarget.h" #include "llvm/Instructions.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -42,12 +41,70 @@ namespace { virtual bool runOnMachineFunction(MachineFunction &MF); - virtual const char *getPassName() const { return "X86 FP_REG_KILL inserter"; } + virtual const char *getPassName() const { + return "X86 FP_REG_KILL inserter"; + } }; char FPRegKiller::ID = 0; } -FunctionPass *llvm::createX87FPRegKillInserterPass() { return new FPRegKiller(); } +FunctionPass *llvm::createX87FPRegKillInserterPass() { + return new FPRegKiller(); +} + +/// isFPStackVReg - Return true if the specified vreg is from a fp stack +/// register class. +static bool isFPStackVReg(unsigned RegNo, const MachineRegisterInfo &MRI) { + if (!TargetRegisterInfo::isVirtualRegister(RegNo)) + return false; + + switch (MRI.getRegClass(RegNo)->getID()) { + default: return false; + case X86::RFP32RegClassID: + case X86::RFP64RegClassID: + case X86::RFP80RegClassID: + return true; + } +} + + +/// ContainsFPStackCode - Return true if the specific MBB has floating point +/// stack code, and thus needs an FP_REG_KILL. +static bool ContainsFPStackCode(MachineBasicBlock *MBB, + const MachineRegisterInfo &MRI) { + // Scan the block, looking for instructions that define fp stack vregs. + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + if (I->getNumOperands() == 0 || !I->getOperand(0).isReg()) + continue; + + for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { + if (!I->getOperand(op).isReg() || !I->getOperand(op).isDef()) + continue; + + if (isFPStackVReg(I->getOperand(op).getReg(), MRI)) + return true; + } + } + + // Check PHI nodes in successor blocks. These PHI's will be lowered to have + // a copy of the input value in this block, which is a definition of the + // value. + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + E = MBB->succ_end(); SI != E; ++ SI) { + MachineBasicBlock *SuccBB = *SI; + for (MachineBasicBlock::iterator I = SuccBB->begin(), E = SuccBB->end(); + I != E; ++I) { + // All PHI nodes are at the top of the block. + if (!I->isPHI()) break; + + if (isFPStackVReg(I->getOperand(0).getReg(), MRI)) + return true; + } + } + + return false; +} bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { // If we are emitting FP stack code, scan the basic block to determine if this @@ -65,14 +122,13 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { // Fast-path: If nothing is using the x87 registers, we don't need to do // any scanning. - MachineRegisterInfo &MRI = MF.getRegInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); if (MRI.getRegClassVirtRegs(X86::RFP80RegisterClass).empty() && MRI.getRegClassVirtRegs(X86::RFP64RegisterClass).empty() && MRI.getRegClassVirtRegs(X86::RFP32RegisterClass).empty()) return false; bool Changed = false; - const X86Subtarget &Subtarget = MF.getTarget().getSubtarget<X86Subtarget>(); MachineFunction::iterator MBBI = MF.begin(); MachineFunction::iterator EndMBB = MF.end(); for (; MBBI != EndMBB; ++MBBI) { @@ -87,48 +143,8 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { continue; } - bool ContainsFPCode = false; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - !ContainsFPCode && I != E; ++I) { - if (I->getNumOperands() != 0 && I->getOperand(0).isReg()) { - const TargetRegisterClass *clas; - for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { - if (I->getOperand(op).isReg() && I->getOperand(op).isDef() && - TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) && - ((clas = MRI.getRegClass(I->getOperand(op).getReg())) == - X86::RFP32RegisterClass || - clas == X86::RFP64RegisterClass || - clas == X86::RFP80RegisterClass)) { - ContainsFPCode = true; - break; - } - } - } - } - // Check PHI nodes in successor blocks. These PHI's will be lowered to have - // a copy of the input value in this block. In SSE mode, we only care about - // 80-bit values. - if (!ContainsFPCode) { - // Final check, check LLVM BB's that are successors to the LLVM BB - // corresponding to BB for FP PHI nodes. - const BasicBlock *LLVMBB = MBB->getBasicBlock(); - const PHINode *PN; - for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB); - !ContainsFPCode && SI != E; ++SI) { - for (BasicBlock::const_iterator II = SI->begin(); - (PN = dyn_cast<PHINode>(II)); ++II) { - if (PN->getType()==Type::getX86_FP80Ty(LLVMBB->getContext()) || - (!Subtarget.hasSSE1() && PN->getType()->isFloatingPointTy()) || - (!Subtarget.hasSSE2() && - PN->getType()==Type::getDoubleTy(LLVMBB->getContext()))) { - ContainsFPCode = true; - break; - } - } - } - } - // Finally, if we found any FP code, emit the FP_REG_KILL instruction. - if (ContainsFPCode) { + // If we find any FP stack code, emit the FP_REG_KILL instruction. + if (ContainsFPStackCode(MBB, MRI)) { BuildMI(*MBB, MBBI->getFirstTerminator(), DebugLoc(), MF.getTarget().getInstrInfo()->get(X86::FP_REG_KILL)); ++NumFPKill; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index fd8bb1e..0f64383 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1693,7 +1693,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { Result, CurDAG->getTargetConstant(8, MVT::i8)), 0); // Then truncate it down to i8. - Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, + Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result); } else { Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, @@ -1834,7 +1834,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { CurDAG->getTargetConstant(8, MVT::i8)), 0); // Then truncate it down to i8. - Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, + Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result); } else { Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, @@ -1883,7 +1883,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } // Extract the l-register. - SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, + SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Reg); // Emit a testb. @@ -1912,7 +1912,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { Reg.getValueType(), Reg, RC), 0); // Extract the h-register. - SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT_HI, dl, + SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, MVT::i8, Reg); // Emit a testb. No special NOREX tricks are needed since there's @@ -1930,7 +1930,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Reg = N0.getNode()->getOperand(0); // Extract the 16-bit subregister. - SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_16BIT, dl, + SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, MVT::i16, Reg); // Emit a testw. @@ -1946,7 +1946,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Reg = N0.getNode()->getOperand(0); // Extract the 32-bit subregister. - SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_32BIT, dl, + SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl, MVT::i32, Reg); // Emit a testl. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6ce9ab7..b02c33d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -94,7 +94,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // X86 is weird, it always uses i8 for shift amounts and setcc results. setShiftAmountType(MVT::i8); setBooleanContents(ZeroOrOneBooleanContent); - setSchedulingPreference(SchedulingForRegPressure); + setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(X86StackPtr); if (Subtarget->isTargetDarwin()) { @@ -145,13 +145,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); } else if (!UseSoftFloat) { - if (X86ScalarSSEf64) { - // We have an impenetrably clever algorithm for ui64->double only. - setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); - } + // We have an algorithm for SSE2->double, and we turn this into a + // 64-bit FILD followed by conditional FADD for other targets. + setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); // We have an algorithm for SSE2, and we turn this into a 64-bit // FILD for other targets. - setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); + setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); } // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have @@ -215,9 +214,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } // TODO: when we have SSE, these could be more efficient, by using movd/movq. - if (!X86ScalarSSEf64) { + if (!X86ScalarSSEf64) { setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); + if (Subtarget->is64Bit()) { + setOperationAction(ISD::BIT_CONVERT , MVT::f64 , Expand); + // Without SSE, i64->f64 goes through memory; i64->MMX is Legal. + if (Subtarget->hasMMX() && !DisableMMX) + setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Custom); + else + setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Expand); + } } // Scalar integer divide and remainder are lowered to use operations that @@ -679,6 +686,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSETCC, MVT::v8i8, Custom); setOperationAction(ISD::VSETCC, MVT::v4i16, Custom); setOperationAction(ISD::VSETCC, MVT::v2i32, Custom); + + if (!X86ScalarSSEf64 && Subtarget->is64Bit()) { + setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Custom); + setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Custom); + setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Custom); + setOperationAction(ISD::BIT_CONVERT, MVT::v2f32, Custom); + setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Custom); + } } if (!UseSoftFloat && Subtarget->hasSSE1()) { @@ -1244,10 +1259,8 @@ X86TargetLowering::LowerReturn(SDValue Chain, MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); unsigned Reg = FuncInfo->getSRetReturnReg(); - if (!Reg) { - Reg = MRI.createVirtualRegister(getRegClassFor(MVT::i64)); - FuncInfo->setSRetReturnReg(Reg); - } + assert(Reg && + "SRetReturnReg should have been set in LowerFormalArguments()."); SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag); @@ -1384,6 +1397,8 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg, return !Subtarget->is64Bit(); case CallingConv::X86_FastCall: return !Subtarget->is64Bit(); + case CallingConv::X86_ThisCall: + return !Subtarget->is64Bit(); case CallingConv::Fast: return GuaranteedTailCallOpt; case CallingConv::GHC: @@ -1405,6 +1420,8 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { if (CC == CallingConv::X86_FastCall) return CC_X86_32_FastCall; + else if (CC == CallingConv::X86_ThisCall) + return CC_X86_32_ThisCall; else if (CC == CallingConv::Fast) return CC_X86_32_FastCC; else if (CC == CallingConv::GHC) @@ -1596,7 +1613,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { - if (Is64Bit || CallConv != CallingConv::X86_FastCall) { + if (Is64Bit || (CallConv != CallingConv::X86_FastCall && + CallConv != CallingConv::X86_ThisCall)) { FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, true, false)); } @@ -1716,7 +1734,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, if (!Is64Bit) { // RegSaveFrameIndex is X86-64 only. FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); - if (CallConv == CallingConv::X86_FastCall) + if (CallConv == CallingConv::X86_FastCall || + CallConv == CallingConv::X86_ThisCall) // fastcc functions can't have varargs. FuncInfo->setVarArgsFrameIndex(0xAAAAAAA); } @@ -5272,7 +5291,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, } // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. - MFI->setHasCalls(true); + MFI->setAdjustsStack(true); SDValue Flag = Chain.getValue(1); return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag); @@ -5462,7 +5481,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, } SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, - SDValue StackSlot, + SDValue StackSlot, SelectionDAG &DAG) const { // Build the FILD DebugLoc dl = Op.getDebugLoc(); @@ -5636,35 +5655,72 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SDValue N0 = Op.getOperand(0); DebugLoc dl = Op.getDebugLoc(); - // Now not UINT_TO_FP is legal (it's marked custom), dag combiner won't + // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform // the optimization here. if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0); EVT SrcVT = N0.getValueType(); - if (SrcVT == MVT::i64) { - // We only handle SSE2 f64 target here; caller can expand the rest. - if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64) - return SDValue(); - + EVT DstVT = Op.getValueType(); + if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64) return LowerUINT_TO_FP_i64(Op, DAG); - } else if (SrcVT == MVT::i32 && X86ScalarSSEf64) { + else if (SrcVT == MVT::i32 && X86ScalarSSEf64) return LowerUINT_TO_FP_i32(Op, DAG); - } - - assert(SrcVT == MVT::i32 && "Unknown UINT_TO_FP to lower!"); // Make a 64-bit buffer, and use it to build an FILD. SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64); - SDValue WordOff = DAG.getConstant(4, getPointerTy()); - SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, - getPointerTy(), StackSlot, WordOff); - SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), + if (SrcVT == MVT::i32) { + SDValue WordOff = DAG.getConstant(4, getPointerTy()); + SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, + getPointerTy(), StackSlot, WordOff); + SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), + StackSlot, NULL, 0, false, false, 0); + SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32), + OffsetSlot, NULL, 0, false, false, 0); + SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG); + return Fild; + } + + assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP"); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot, NULL, 0, false, false, 0); - SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32), - OffsetSlot, NULL, 0, false, false, 0); - return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG); + // For i64 source, we need to add the appropriate power of 2 if the input + // was negative. This is the same as the optimization in + // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here, + // we must be careful to do the computation in x87 extended precision, not + // in SSE. (The generic code can't know it's OK to do this, or how to.) + SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); + SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) }; + SDValue Fild = DAG.getNode(X86ISD::FILD, dl, Tys, Ops, 3); + + APInt FF(32, 0x5F800000ULL); + + // Check whether the sign bit is set. + SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), + Op.getOperand(0), DAG.getConstant(0, MVT::i64), + ISD::SETLT); + + // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits. + SDValue FudgePtr = DAG.getConstantPool( + ConstantInt::get(*DAG.getContext(), FF.zext(64)), + getPointerTy()); + + // Get a pointer to FF if the sign bit was set, or to 0 otherwise. + SDValue Zero = DAG.getIntPtrConstant(0); + SDValue Four = DAG.getIntPtrConstant(4); + SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet, + Zero, Four); + FudgePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FudgePtr, Offset); + + // Load the value out, extending it from f32 to f80. + // FIXME: Avoid the extend by constructing the right constant pool? + SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), + FudgePtr, PseudoSourceValue::getConstantPool(), + 0, MVT::f32, false, false, 4); + // Extend everything to 80 bits to force it to be done on x87. + SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge); + return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0)); } std::pair<SDValue,SDValue> X86TargetLowering:: @@ -6593,221 +6649,6 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, return DAG.getMergeValues(Ops1, 2, dl); } -SDValue -X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, - const Value *DstSV, - uint64_t DstSVOff) const { - ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); - - // If not DWORD aligned or size is more than the threshold, call the library. - // The libc version is likely to be faster for these cases. It can use the - // address value and run time information about the CPU. - if ((Align & 3) != 0 || - !ConstantSize || - ConstantSize->getZExtValue() > - getSubtarget()->getMaxInlineSizeThreshold()) { - SDValue InFlag(0, 0); - - // Check to see if there is a specialized entry-point for memory zeroing. - ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); - - if (const char *bzeroEntry = V && - V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { - EVT IntPtr = getPointerTy(); - const Type *IntPtrTy = TD->getIntPtrType(*DAG.getContext()); - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = Dst; - Entry.Ty = IntPtrTy; - Args.push_back(Entry); - Entry.Node = Size; - Args.push_back(Entry); - std::pair<SDValue,SDValue> CallResult = - LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), - false, false, false, false, - 0, CallingConv::C, false, /*isReturnValueUsed=*/false, - DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl); - return CallResult.second; - } - - // Otherwise have the target-independent code call memset. - return SDValue(); - } - - uint64_t SizeVal = ConstantSize->getZExtValue(); - SDValue InFlag(0, 0); - EVT AVT; - SDValue Count; - ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); - unsigned BytesLeft = 0; - bool TwoRepStos = false; - if (ValC) { - unsigned ValReg; - uint64_t Val = ValC->getZExtValue() & 255; - - // If the value is a constant, then we can potentially use larger sets. - switch (Align & 3) { - case 2: // WORD aligned - AVT = MVT::i16; - ValReg = X86::AX; - Val = (Val << 8) | Val; - break; - case 0: // DWORD aligned - AVT = MVT::i32; - ValReg = X86::EAX; - Val = (Val << 8) | Val; - Val = (Val << 16) | Val; - if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned - AVT = MVT::i64; - ValReg = X86::RAX; - Val = (Val << 32) | Val; - } - break; - default: // Byte aligned - AVT = MVT::i8; - ValReg = X86::AL; - Count = DAG.getIntPtrConstant(SizeVal); - break; - } - - if (AVT.bitsGT(MVT::i8)) { - unsigned UBytes = AVT.getSizeInBits() / 8; - Count = DAG.getIntPtrConstant(SizeVal / UBytes); - BytesLeft = SizeVal % UBytes; - } - - Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT), - InFlag); - InFlag = Chain.getValue(1); - } else { - AVT = MVT::i8; - Count = DAG.getIntPtrConstant(SizeVal); - Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag); - InFlag = Chain.getValue(1); - } - - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : - X86::ECX, - Count, InFlag); - InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : - X86::EDI, - Dst, InFlag); - InFlag = Chain.getValue(1); - - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; - Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); - - if (TwoRepStos) { - InFlag = Chain.getValue(1); - Count = Size; - EVT CVT = Count.getValueType(); - SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, - DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); - Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : - X86::ECX, - Left, InFlag); - InFlag = Chain.getValue(1); - Tys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; - Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); - } else if (BytesLeft) { - // Handle the last 1 - 7 bytes. - unsigned Offset = SizeVal - BytesLeft; - EVT AddrVT = Dst.getValueType(); - EVT SizeVT = Size.getValueType(); - - Chain = DAG.getMemset(Chain, dl, - DAG.getNode(ISD::ADD, dl, AddrVT, Dst, - DAG.getConstant(Offset, AddrVT)), - Src, - DAG.getConstant(BytesLeft, SizeVT), - Align, isVolatile, DstSV, DstSVOff + Offset); - } - - // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. - return Chain; -} - -SDValue -X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, - const Value *DstSV, - uint64_t DstSVOff, - const Value *SrcSV, - uint64_t SrcSVOff) const { - // This requires the copy size to be a constant, preferrably - // within a subtarget-specific limit. - ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); - if (!ConstantSize) - return SDValue(); - uint64_t SizeVal = ConstantSize->getZExtValue(); - if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) - return SDValue(); - - /// If not DWORD aligned, call the library. - if ((Align & 3) != 0) - return SDValue(); - - // DWORD aligned - EVT AVT = MVT::i32; - if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned - AVT = MVT::i64; - - unsigned UBytes = AVT.getSizeInBits() / 8; - unsigned CountVal = SizeVal / UBytes; - SDValue Count = DAG.getIntPtrConstant(CountVal); - unsigned BytesLeft = SizeVal % UBytes; - - SDValue InFlag(0, 0); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : - X86::ECX, - Count, InFlag); - InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : - X86::EDI, - Dst, InFlag); - InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : - X86::ESI, - Src, InFlag); - InFlag = Chain.getValue(1); - - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; - SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops, - array_lengthof(Ops)); - - SmallVector<SDValue, 4> Results; - Results.push_back(RepMovs); - if (BytesLeft) { - // Handle the last 1 - 7 bytes. - unsigned Offset = SizeVal - BytesLeft; - EVT DstVT = Dst.getValueType(); - EVT SrcVT = Src.getValueType(); - EVT SizeVT = Size.getValueType(); - Results.push_back(DAG.getMemcpy(Chain, dl, - DAG.getNode(ISD::ADD, dl, DstVT, Dst, - DAG.getConstant(Offset, DstVT)), - DAG.getNode(ISD::ADD, dl, SrcVT, Src, - DAG.getConstant(Offset, SrcVT)), - DAG.getConstant(BytesLeft, SizeVT), - Align, isVolatile, AlwaysInline, - DstSV, DstSVOff + Offset, - SrcSV, SrcSVOff + Offset)); - } - - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &Results[0], Results.size()); -} - SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); @@ -7138,6 +6979,9 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setReturnAddressIsTaken(true); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); DebugLoc dl = Op.getDebugLoc(); @@ -7161,6 +7005,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); @@ -7298,6 +7143,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, break; } case CallingConv::X86_FastCall: + case CallingConv::X86_ThisCall: case CallingConv::Fast: // Pass 'nest' parameter in EAX. // Must be kept in sync with X86CallingConv.td @@ -7630,6 +7476,27 @@ SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op, return DAG.getMergeValues(Ops, 2, dl); } +SDValue X86TargetLowering::LowerBIT_CONVERT(SDValue Op, + SelectionDAG &DAG) const { + EVT SrcVT = Op.getOperand(0).getValueType(); + EVT DstVT = Op.getValueType(); + assert((Subtarget->is64Bit() && !Subtarget->hasSSE2() && + Subtarget->hasMMX() && !DisableMMX) && + "Unexpected custom BIT_CONVERT"); + assert((DstVT == MVT::i64 || + (DstVT.isVector() && DstVT.getSizeInBits()==64)) && + "Unexpected custom BIT_CONVERT"); + // i64 <=> MMX conversions are Legal. + if (SrcVT==MVT::i64 && DstVT.isVector()) + return Op; + if (DstVT==MVT::i64 && SrcVT.isVector()) + return Op; + // MMX <=> MMX conversions are Legal. + if (SrcVT.isVector() && DstVT.isVector()) + return Op; + // All other conversions need to be expanded. + return SDValue(); +} SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); @@ -7699,6 +7566,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SMULO: case ISD::UMULO: return LowerXALUO(Op, DAG); case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG); + case ISD::BIT_CONVERT: return LowerBIT_CONVERT(Op, DAG); } } @@ -8203,9 +8071,15 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, MachineOperand& dest1Oper = bInstr->getOperand(0); MachineOperand& dest2Oper = bInstr->getOperand(1); MachineOperand* argOpers[2 + X86AddrNumOperands]; - for (int i=0; i < 2 + X86AddrNumOperands; ++i) + for (int i=0; i < 2 + X86AddrNumOperands; ++i) { argOpers[i] = &bInstr->getOperand(i+2); + // We use some of the operands multiple times, so conservatively just + // clear any kill flags that might be present. + if (argOpers[i]->isReg() && argOpers[i]->isUse()) + argOpers[i]->setIsKill(false); + } + // x86 address has 5 operands: base, index, scale, displacement, and segment. int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 440601f9..1ef1a7b 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -563,7 +563,7 @@ namespace llvm { return !X86ScalarSSEf64 || VT == MVT::f80; } - virtual const X86Subtarget* getSubtarget() const { + const X86Subtarget* getSubtarget() const { return Subtarget; } @@ -677,6 +677,7 @@ namespace llvm { SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, SelectionDAG &DAG) const; + SDValue LowerBIT_CONVERT(SDValue op, SelectionDAG &DAG) const; SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; @@ -743,23 +744,6 @@ namespace llvm { void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG, unsigned NewOp) const; - SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, - const Value *DstSV, - uint64_t DstSVOff) const; - SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, - const Value *DstSV, - uint64_t DstSVOff, - const Value *SrcSV, - uint64_t SrcSVOff) const; - /// Utility function to emit string processing sse4.2 instructions /// that return in xmm0. /// This takes the instruction to expand, the associated machine basic diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index f5c3dbf..97eb17c 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -18,7 +18,9 @@ // // 64-bits but only 32 bits are significant. -def i64i32imm : Operand<i64>; +def i64i32imm : Operand<i64> { + let ParserMatchClass = ImmSExti64i32AsmOperand; +} // 64-bits but only 32 bits are significant, and those bits are treated as being // pc relative. @@ -30,7 +32,7 @@ def i64i32imm_pcrel : Operand<i64> { // 64-bits but only 8 bits are significant. def i64i8imm : Operand<i64> { - let ParserMatchClass = ImmSExt8AsmOperand; + let ParserMatchClass = ImmSExti64i8AsmOperand; } // Special i64mem for addresses of load folding tail calls. These are not @@ -198,6 +200,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64_TC:$dst, i32imm:$offset, variable_ops), "#TC_RETURN $dst $offset", []>; + let mayLoad = 1 in def TCRETURNmi64 : I<0, Pseudo, (outs), (ins i64mem_TC:$dst, i32imm:$offset, variable_ops), "#TC_RETURN $dst $offset", []>; @@ -208,6 +211,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64_TC:$dst, variable_ops), "jmp{q}\t{*}$dst # TAILCALL", []>; + let mayLoad = 1 in def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops), "jmp{q}\t{*}$dst # TAILCALL", []>; } @@ -241,6 +245,7 @@ def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS; +let mayLoad = 1 in def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS; @@ -267,14 +272,16 @@ def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), "push{q}\t$imm", []>; def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), "push{q}\t$imm", []>; -def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), +def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm), "push{q}\t$imm", []>; } -let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1 in -def POPFQ : I<0x9D, RawFrm, (outs), (ins), "popf{q}", []>, REX_W; -let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1 in -def PUSHFQ64 : I<0x9C, RawFrm, (outs), (ins), "pushf{q}", []>; +let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in +def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", []>, + Requires<[In64BitMode]>; +let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in +def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>, + Requires<[In64BitMode]>; def LEA64_32r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_32mem:$src), @@ -309,16 +316,22 @@ def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), } // Defs = [EFLAGS] // Repeat string ops -let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI] in +let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}", [(X86rep_movs i64)]>, REP; -let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI] in +let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", [(X86rep_stos i64)]>, REP; -def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scas{q}", []>; +let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in +def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>; + +let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in +def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>; -def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmps{q}", []>; +def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>; + +def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>; // Fast system-call instructions def SYSEXIT64 : RI<0x35, RawFrm, @@ -341,8 +354,17 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src), [(set GR64:$dst, i64immSExt32:$src)]>; } +// The assembler accepts movq of a 64-bit immediate as an alternate spelling of +// movabsq. +let isAsmParserOnly = 1 in { +def MOV64ri_alt : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>; +} + +let isCodeGenOnly = 1 in { def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", []>; +} let canFoldAsLoad = 1, isReMaterializable = 1 in def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), @@ -398,9 +420,9 @@ def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; // Moves to and from control registers -def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG_64:$src), +def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src), "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; -def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_64:$dst), (ins GR64:$src), +def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; // Sign/Zero extenders @@ -478,7 +500,7 @@ def def32 : PatLeaf<(i32 GR32:$src), [{ // In the case of a 32-bit def that is known to implicitly zero-extend, // we can use a SUBREG_TO_REG. def : Pat<(i64 (zext def32:$src)), - (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>; + (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; let neverHasSideEffects = 1 in { let Defs = [RAX], Uses = [EAX] in @@ -496,7 +518,7 @@ let neverHasSideEffects = 1 in { let Defs = [EFLAGS] in { -def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i32imm:$src), +def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i64i32imm:$src), "add{q}\t{$src, %rax|%rax, $src}", []>; let isTwoAddress = 1 in { @@ -555,7 +577,7 @@ def ADD64mi32 : RIi32<0x81, MRM0m, (outs), (ins i64mem:$dst, i64i32imm :$src2), let Uses = [EFLAGS] in { -def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i32imm:$src), +def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i64i32imm:$src), "adc{q}\t{$src, %rax|%rax, $src}", []>; let isTwoAddress = 1 in { @@ -565,9 +587,11 @@ def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst), "adc{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (adde GR64:$src1, GR64:$src2))]>; +let isCodeGenOnly = 1 in { def ADC64rr_REV : RI<0x13, MRMSrcReg , (outs GR32:$dst), (ins GR64:$src1, GR64:$src2), "adc{q}\t{$src2, $dst|$dst, $src2}", []>; +} def ADC64rm : RI<0x13, MRMSrcMem , (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), @@ -605,9 +629,11 @@ def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst), [(set GR64:$dst, EFLAGS, (X86sub_flag GR64:$src1, GR64:$src2))]>; +let isCodeGenOnly = 1 in { def SUB64rr_REV : RI<0x2B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "sub{q}\t{$src2, $dst|$dst, $src2}", []>; +} // Register-Memory Subtraction def SUB64rm : RI<0x2B, MRMSrcMem, (outs GR64:$dst), @@ -629,7 +655,7 @@ def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst), (X86sub_flag GR64:$src1, i64immSExt32:$src2))]>; } // isTwoAddress -def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i32imm:$src), +def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i64i32imm:$src), "sub{q}\t{$src, %rax|%rax, $src}", []>; // Memory-Register Subtraction @@ -657,9 +683,11 @@ def SBB64rr : RI<0x19, MRMDestReg, (outs GR64:$dst), "sbb{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (sube GR64:$src1, GR64:$src2))]>; +let isCodeGenOnly = 1 in { def SBB64rr_REV : RI<0x1B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "sbb{q}\t{$src2, $dst|$dst, $src2}", []>; +} def SBB64rm : RI<0x1B, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), @@ -676,7 +704,7 @@ def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst), [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>; } // isTwoAddress -def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i32imm:$src), +def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i64i32imm:$src), "sbb{q}\t{$src, %rax|%rax, $src}", []>; def SBB64mr : RI<0x19, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), @@ -1076,7 +1104,7 @@ def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst", [(store (not (loadi64 addr:$dst)), addr:$dst)]>; let Defs = [EFLAGS] in { -def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i32imm:$src), +def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i64i32imm:$src), "and{q}\t{$src, %rax|%rax, $src}", []>; let isTwoAddress = 1 in { @@ -1086,9 +1114,11 @@ def AND64rr : RI<0x21, MRMDestReg, "and{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86and_flag GR64:$src1, GR64:$src2))]>; +let isCodeGenOnly = 1 in { def AND64rr_REV : RI<0x23, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "and{q}\t{$src2, $dst|$dst, $src2}", []>; +} def AND64rm : RI<0x23, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "and{q}\t{$src2, $dst|$dst, $src2}", @@ -1129,9 +1159,11 @@ def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst), "or{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86or_flag GR64:$src1, GR64:$src2))]>; +let isCodeGenOnly = 1 in { def OR64rr_REV : RI<0x0B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", []>; +} def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", @@ -1162,7 +1194,7 @@ def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src), [(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), (implicit EFLAGS)]>; -def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i32imm:$src), +def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i64i32imm:$src), "or{q}\t{$src, %rax|%rax, $src}", []>; let isTwoAddress = 1 in { @@ -1172,9 +1204,11 @@ def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst), "xor{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86xor_flag GR64:$src1, GR64:$src2))]>; +let isCodeGenOnly = 1 in { def XOR64rr_REV : RI<0x33, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "xor{q}\t{$src2, $dst|$dst, $src2}", []>; +} def XOR64rm : RI<0x33, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "xor{q}\t{$src2, $dst|$dst, $src2}", @@ -1205,7 +1239,7 @@ def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src), [(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), (implicit EFLAGS)]>; -def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i32imm:$src), +def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i64i32imm:$src), "xor{q}\t{$src, %rax|%rax, $src}", []>; } // Defs = [EFLAGS] @@ -1216,7 +1250,7 @@ def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i32imm:$src), // Integer comparison let Defs = [EFLAGS] in { -def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i32imm:$src), +def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i64i32imm:$src), "test{q}\t{$src, %rax|%rax, $src}", []>; let isCommutable = 1 in def TEST64rr : RI<0x85, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2), @@ -1238,7 +1272,7 @@ def TEST64mi32 : RIi32<0xF7, MRM0m, (outs), i64immSExt32:$src2), 0))]>; -def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i32imm:$src), +def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i64i32imm:$src), "cmp{q}\t{$src, %rax|%rax, $src}", []>; def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "cmp{q}\t{$src2, $src1|$src1, $src2}", @@ -1293,7 +1327,8 @@ def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), def BT64ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB; + [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB, + REX_W; // Note that these instructions don't need FastBTMem because that // only applies when the other operand is in a register. When it's // an immediate, bt is still fast. @@ -1714,11 +1749,13 @@ def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src), def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB; +let mayLoad = 1, mayStore = 1 in def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB; def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB; +let mayLoad = 1, mayStore = 1 in def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB; @@ -1730,7 +1767,7 @@ def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src), "xchg{q}\t{$src, %rax|%rax, $src}", []>; // Optimized codegen when the non-memory output is not used. -let Defs = [EFLAGS] in { +let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in { // FIXME: Use normal add / sub instructions and add lock prefix dynamically. def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "lock\n\t" @@ -1982,14 +2019,14 @@ def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>; // defined after an extload. def : Pat<(extloadi64i32 addr:$src), (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), - x86_subreg_32bit)>; + sub_32bit)>; // anyext. Define these to do an explicit zero-extend to // avoid partial-register updates. def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>; def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>; def : Pat<(i64 (anyext GR32:$src)), - (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>; + (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; //===----------------------------------------------------------------------===// // Some peepholes @@ -2016,54 +2053,54 @@ def : Pat<(and GR64:$src, i64immZExt32:$imm), (SUBREG_TO_REG (i64 0), (AND32ri - (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit), + (EXTRACT_SUBREG GR64:$src, sub_32bit), (i32 (GetLo32XForm imm:$imm))), - x86_subreg_32bit)>; + sub_32bit)>; // r & (2^32-1) ==> movz def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), - (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>; + (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>; // r & (2^16-1) ==> movz def : Pat<(and GR64:$src, 0xffff), - (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>; + (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>; // r & (2^8-1) ==> movz def : Pat<(and GR64:$src, 0xff), - (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)))>; + (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>; // r & (2^8-1) ==> movz def : Pat<(and GR32:$src1, 0xff), - (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, x86_subreg_8bit))>, + (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>, Requires<[In64BitMode]>; // r & (2^8-1) ==> movz def : Pat<(and GR16:$src1, 0xff), - (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, x86_subreg_8bit)))>, + (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>, Requires<[In64BitMode]>; // sext_inreg patterns def : Pat<(sext_inreg GR64:$src, i32), - (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>; + (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>; def : Pat<(sext_inreg GR64:$src, i16), - (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>; + (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>; def : Pat<(sext_inreg GR64:$src, i8), - (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit))>; + (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>; def : Pat<(sext_inreg GR32:$src, i8), - (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit))>, + (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>, Requires<[In64BitMode]>; def : Pat<(sext_inreg GR16:$src, i8), - (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)))>, + (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>, Requires<[In64BitMode]>; // trunc patterns def : Pat<(i32 (trunc GR64:$src)), - (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)>; + (EXTRACT_SUBREG GR64:$src, sub_32bit)>; def : Pat<(i16 (trunc GR64:$src)), - (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)>; + (EXTRACT_SUBREG GR64:$src, sub_16bit)>; def : Pat<(i8 (trunc GR64:$src)), - (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)>; + (EXTRACT_SUBREG GR64:$src, sub_8bit)>; def : Pat<(i8 (trunc GR32:$src)), - (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit)>, + (EXTRACT_SUBREG GR32:$src, sub_8bit)>, Requires<[In64BitMode]>; def : Pat<(i8 (trunc GR16:$src)), - (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)>, + (EXTRACT_SUBREG GR16:$src, sub_8bit)>, Requires<[In64BitMode]>; // h-register tricks. @@ -2079,67 +2116,67 @@ def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)), (i64 0), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)), - x86_subreg_8bit_hi)), - x86_subreg_32bit)>; + sub_8bit_hi)), + sub_32bit)>; def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(srl GR16:$src, (i8 8)), (EXTRACT_SUBREG (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi)), - x86_subreg_16bit)>, + sub_8bit_hi)), + sub_16bit)>, Requires<[In64BitMode]>; def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))), (SUBREG_TO_REG (i64 0), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi)), - x86_subreg_32bit)>; + sub_8bit_hi)), + sub_32bit)>; def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))), (SUBREG_TO_REG (i64 0), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi)), - x86_subreg_32bit)>; + sub_8bit_hi)), + sub_32bit)>; // h-register extract and store. def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)), - x86_subreg_8bit_hi))>; + sub_8bit_hi))>; def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In64BitMode]>; // (shl x, 1) ==> (add x, x) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index a21bfb9..34e12ca 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -744,17 +744,17 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, case X86::MOVZX32rr8: case X86::MOVSX64rr8: case X86::MOVZX64rr8: - SubIdx = 1; + SubIdx = X86::sub_8bit; break; case X86::MOVSX32rr16: case X86::MOVZX32rr16: case X86::MOVSX64rr16: case X86::MOVZX64rr16: - SubIdx = 3; + SubIdx = X86::sub_16bit; break; case X86::MOVSX64rr32: case X86::MOVZX64rr32: - SubIdx = 4; + SubIdx = X86::sub_32bit; break; } return true; @@ -1065,7 +1065,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, const TargetRegisterInfo *TRI) const { - DebugLoc DL = MBB.findDebugLoc(I); + DebugLoc DL = Orig->getDebugLoc(); if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { DestReg = TRI->getSubReg(DestReg, SubIdx); @@ -1154,7 +1154,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg) .addReg(leaInReg) .addReg(Src, getKillRegState(isKill)) - .addImm(X86::SUBREG_16BIT); + .addImm(X86::sub_16bit); MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg); @@ -1198,7 +1198,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2) .addReg(leaInReg2) .addReg(Src2, getKillRegState(isKill2)) - .addImm(X86::SUBREG_16BIT); + .addImm(X86::sub_16bit); addRegReg(MIB, leaInReg, true, leaInReg2, true); } if (LV && isKill2 && InsMI2) @@ -1212,7 +1212,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)) .addReg(leaOutReg, RegState::Kill) - .addImm(X86::SUBREG_16BIT); + .addImm(X86::sub_16bit); if (LV) { // Update live variables @@ -1901,8 +1901,8 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL = MBB.findDebugLoc(MI); + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { // Determine if DstRC and SrcRC have a common superclass in common. const TargetRegisterClass *CommonRC = DestRC; @@ -1993,12 +1993,12 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, if (SrcReg != X86::EFLAGS) return false; if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { - BuildMI(MBB, MI, DL, get(X86::PUSHFQ64)); + BuildMI(MBB, MI, DL, get(X86::PUSHF64)); BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); return true; } else if (DestRC == &X86::GR32RegClass || DestRC == &X86::GR32_NOSPRegClass) { - BuildMI(MBB, MI, DL, get(X86::PUSHFD)); + BuildMI(MBB, MI, DL, get(X86::PUSHF32)); BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); return true; } @@ -2007,12 +2007,12 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, return false; if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg); - BuildMI(MBB, MI, DL, get(X86::POPFQ)); + BuildMI(MBB, MI, DL, get(X86::POPF64)); return true; } else if (SrcRC == &X86::GR32RegClass || DestRC == &X86::GR32_NOSPRegClass) { BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg); - BuildMI(MBB, MI, DL, get(X86::POPFD)); + BuildMI(MBB, MI, DL, get(X86::POPF32)); return true; } } @@ -2133,7 +2133,8 @@ static unsigned getStoreRegOpcode(unsigned SrcReg, void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIdx, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); @@ -2230,7 +2231,8 @@ static unsigned getLoadRegOpcode(unsigned DestReg, void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC) const{ + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); @@ -2256,7 +2258,8 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const { + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -2284,7 +2287,8 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, CalleeFrameSize += SlotSize; BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); } else { - storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass); + storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass, + &RI); } } @@ -2294,7 +2298,8 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const { + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -2314,7 +2319,7 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (RegClass != &X86::VR128RegClass && !isWin64) { BuildMI(MBB, MI, DL, get(Opc), Reg); } else { - loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass); + loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass, &RI); } } return true; @@ -2478,9 +2483,9 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, unsigned DstReg = NewMI->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(DstReg)) NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, - 4/*x86_subreg_32bit*/)); + X86::sub_32bit)); else - NewMI->getOperand(0).setSubReg(4/*x86_subreg_32bit*/); + NewMI->getOperand(0).setSubReg(X86::sub_32bit); } return NewMI; } @@ -2526,9 +2531,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, switch (MI->getOpcode()) { default: return NULL; case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break; - case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break; - case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break; - case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break; + case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break; + case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break; + case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break; } // Check if it's safe to fold the load. If the size of the object is // narrower than the load width, then it's not. @@ -2595,9 +2600,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, switch (MI->getOpcode()) { default: return NULL; case X86::TEST8rr: NewOpc = X86::CMP8ri; break; - case X86::TEST16rr: NewOpc = X86::CMP16ri; break; - case X86::TEST32rr: NewOpc = X86::CMP32ri; break; - case X86::TEST64rr: NewOpc = X86::CMP64ri32; break; + case X86::TEST16rr: NewOpc = X86::CMP16ri8; break; + case X86::TEST32rr: NewOpc = X86::CMP32ri8; break; + case X86::TEST64rr: NewOpc = X86::CMP64ri8; break; } // Change to CMPXXri r, 0 first. MI->setDesc(get(NewOpc)); @@ -2805,16 +2810,22 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, switch (DataMI->getOpcode()) { default: break; case X86::CMP64ri32: + case X86::CMP64ri8: case X86::CMP32ri: + case X86::CMP32ri8: case X86::CMP16ri: + case X86::CMP16ri8: case X86::CMP8ri: { MachineOperand &MO0 = DataMI->getOperand(0); MachineOperand &MO1 = DataMI->getOperand(1); if (MO1.getImm() == 0) { switch (DataMI->getOpcode()) { default: break; + case X86::CMP64ri8: case X86::CMP64ri32: NewOpc = X86::TEST64rr; break; + case X86::CMP32ri8: case X86::CMP32ri: NewOpc = X86::TEST32rr; break; + case X86::CMP16ri8: case X86::CMP16ri: NewOpc = X86::TEST16rr; break; case X86::CMP8ri: NewOpc = X86::TEST8rr; break; } diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index df99c7f..62d7c74 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -590,11 +590,13 @@ public: MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, SmallVectorImpl<MachineOperand> &Addr, @@ -606,7 +608,8 @@ public: virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, SmallVectorImpl<MachineOperand> &Addr, @@ -617,11 +620,13 @@ public: virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index a2754ea..0d59c42 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -195,15 +195,15 @@ def ptr_rc_nosp : PointerLikeRegClass<1>; // def X86MemAsmOperand : AsmOperandClass { let Name = "Mem"; - let SuperClass = ?; -} -def X86AbsMemAsmOperand : AsmOperandClass { - let Name = "AbsMem"; - let SuperClass = X86MemAsmOperand; + let SuperClasses = []; } def X86NoSegMemAsmOperand : AsmOperandClass { let Name = "NoSegMem"; - let SuperClass = X86MemAsmOperand; + let SuperClasses = [X86MemAsmOperand]; +} +def X86AbsMemAsmOperand : AsmOperandClass { + let Name = "AbsMem"; + let SuperClasses = [X86NoSegMemAsmOperand]; } class X86MemOperand<string printMethod> : Operand<iPTR> { let PrintMethod = printMethod; @@ -270,19 +270,49 @@ def SSECC : Operand<i8> { let PrintMethod = "printSSECC"; } -def ImmSExt8AsmOperand : AsmOperandClass { - let Name = "ImmSExt8"; - let SuperClass = ImmAsmOperand; +class ImmSExtAsmOperandClass : AsmOperandClass { + let SuperClasses = [ImmAsmOperand]; + let RenderMethod = "addImmOperands"; +} + +// Sign-extended immediate classes. We don't need to define the full lattice +// here because there is no instruction with an ambiguity between ImmSExti64i32 +// and ImmSExti32i8. +// +// The strange ranges come from the fact that the assembler always works with +// 64-bit immediates, but for a 16-bit target value we want to accept both "-1" +// (which will be a -1ULL), and "0xFF" (-1 in 16-bits). + +// [0, 0x7FFFFFFF] | [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF] +def ImmSExti64i32AsmOperand : ImmSExtAsmOperandClass { + let Name = "ImmSExti64i32"; +} + +// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] +def ImmSExti16i8AsmOperand : ImmSExtAsmOperandClass { + let Name = "ImmSExti16i8"; + let SuperClasses = [ImmSExti64i32AsmOperand]; +} + +// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] +def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass { + let Name = "ImmSExti32i8"; +} + +// [0, 0x0000007F] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] +def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass { + let Name = "ImmSExti64i8"; + let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand, ImmSExti64i32AsmOperand]; } // A couple of more descriptive operand definitions. // 16-bits but only 8 bits are significant. def i16i8imm : Operand<i16> { - let ParserMatchClass = ImmSExt8AsmOperand; + let ParserMatchClass = ImmSExti16i8AsmOperand; } // 32-bits but only 8 bits are significant. def i32i8imm : Operand<i32> { - let ParserMatchClass = ImmSExt8AsmOperand; + let ParserMatchClass = ImmSExti32i8AsmOperand; } //===----------------------------------------------------------------------===// @@ -542,8 +572,10 @@ let neverHasSideEffects = 1 in { } // Trap -def INT3 : I<0xcc, RawFrm, (outs), (ins), "int\t3", []>; -def INT : I<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>; +def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>; +def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", []>; +// FIXME: need to make sure that "int $3" matches int3 +def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>; def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize; def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l}", []>; @@ -693,6 +725,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in def TCRETURNri : I<0, Pseudo, (outs), (ins GR32_TC:$dst, i32imm:$offset, variable_ops), "#TC_RETURN $dst $offset", []>; + let mayLoad = 1 in def TCRETURNmi : I<0, Pseudo, (outs), (ins i32mem_TC:$dst, i32imm:$offset, variable_ops), "#TC_RETURN $dst $offset", []>; @@ -706,8 +739,16 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops), "jmp{l}\t{*}$dst # TAILCALL", []>; + let mayLoad = 1 in def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops), "jmp{l}\t{*}$dst # TAILCALL", []>; + + // FIXME: This is a hack so that MCInst lowering can preserve the TAILCALL + // marker on instructions, while still being able to relax. + let isCodeGenOnly = 1 in { + def TAILJMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst), + "jmp\t$dst # TAILCALL", []>; + } } //===----------------------------------------------------------------------===// @@ -719,10 +760,12 @@ def LEAVE : I<0xC9, RawFrm, def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS; +let mayLoad = 1 in def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS; def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS; +let mayLoad = 1 in def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS; @@ -762,12 +805,14 @@ def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), } let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in { -def POPF : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize; -def POPFD : I<0x9D, RawFrm, (outs), (ins), "popf{l}", []>; +def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize; +def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>, + Requires<[In32BitMode]>; } let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in { -def PUSHF : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize; -def PUSHFD : I<0x9C, RawFrm, (outs), (ins), "pushf{l}", []>; +def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize; +def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>, + Requires<[In32BitMode]>; } let isTwoAddress = 1 in // GR32 = bswap GR32 @@ -867,7 +912,7 @@ def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, let Defs = [RAX, RCX, RDX] in def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB; -let isBarrier = 1, hasCtrlDep = 1 in { +let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in { def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB; } @@ -966,36 +1011,47 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src), "mov{l}\t{$src, $dst|$dst, $src}", [(store (i32 imm:$src), addr:$dst)]>; -def MOV8o8a : Ii8 <0xA0, RawFrm, (outs), (ins offset8:$src), +/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a +/// 32-bit offset from the PC. These are only valid in x86-32 mode. +def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src), "mov{b}\t{$src, %al|%al, $src}", []>; -def MOV16o16a : Ii16 <0xA1, RawFrm, (outs), (ins offset16:$src), +def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src), "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src), "mov{l}\t{$src, %eax|%eax, $src}", []>; - -def MOV8ao8 : Ii8 <0xA2, RawFrm, (outs offset8:$dst), (ins), +def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins), "mov{b}\t{%al, $dst|$dst, %al}", []>; -def MOV16ao16 : Ii16 <0xA3, RawFrm, (outs offset16:$dst), (ins), +def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins), "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize; def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), "mov{l}\t{%eax, $dst|$dst, %eax}", []>; - + // Moves to and from segment registers def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>; + "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>; def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>; + "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>; def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>; + "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>; def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>; + "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>; +let isCodeGenOnly = 1 in { def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src), "mov{b}\t{$src, $dst|$dst, $src}", []>; def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", []>; +} let canFoldAsLoad = 1, isReMaterializable = 1 in { def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src), @@ -1059,10 +1115,10 @@ def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; // Moves to and from control registers -def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG_32:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; -def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_32:$dst), (ins GR32:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; +def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; +def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; //===----------------------------------------------------------------------===// // Fixed-Register Multiplication and Division Instructions... @@ -1746,6 +1802,7 @@ def AND32rr : I<0x21, MRMDestReg, // AND instructions with the destination register in REG and the source register // in R/M. Included for the disassembler. +let isCodeGenOnly = 1 in { def AND8rr_REV : I<0x22, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), "and{b}\t{$src2, $dst|$dst, $src2}", []>; def AND16rr_REV : I<0x23, MRMSrcReg, (outs GR16:$dst), @@ -1754,6 +1811,7 @@ def AND16rr_REV : I<0x23, MRMSrcReg, (outs GR16:$dst), def AND32rr_REV : I<0x23, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "and{l}\t{$src2, $dst|$dst, $src2}", []>; +} def AND8rm : I<0x22, MRMSrcMem, (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2), @@ -1872,6 +1930,7 @@ def OR32rr : I<0x09, MRMDestReg, (outs GR32:$dst), // OR instructions with the destination register in REG and the source register // in R/M. Included for the disassembler. +let isCodeGenOnly = 1 in { def OR8rr_REV : I<0x0A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), "or{b}\t{$src2, $dst|$dst, $src2}", []>; def OR16rr_REV : I<0x0B, MRMSrcReg, (outs GR16:$dst), @@ -1880,6 +1939,7 @@ def OR16rr_REV : I<0x0B, MRMSrcReg, (outs GR16:$dst), def OR32rr_REV : I<0x0B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "or{l}\t{$src2, $dst|$dst, $src2}", []>; +} def OR8rm : I<0x0A, MRMSrcMem, (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2), @@ -1988,6 +2048,7 @@ let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y // XOR instructions with the destination register in REG and the source register // in R/M. Included for the disassembler. +let isCodeGenOnly = 1 in { def XOR8rr_REV : I<0x32, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), "xor{b}\t{$src2, $dst|$dst, $src2}", []>; def XOR16rr_REV : I<0x33, MRMSrcReg, (outs GR16:$dst), @@ -1996,6 +2057,7 @@ def XOR16rr_REV : I<0x33, MRMSrcReg, (outs GR16:$dst), def XOR32rr_REV : I<0x33, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "xor{l}\t{$src2, $dst|$dst, $src2}", []>; +} def XOR8rm : I<0x32, MRMSrcMem, (outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2), @@ -2793,6 +2855,7 @@ def ADC32rr : I<0x11, MRMDestReg, (outs GR32:$dst), [(set GR32:$dst, (adde GR32:$src1, GR32:$src2))]>; } +let isCodeGenOnly = 1 in { def ADC8rr_REV : I<0x12, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), "adc{b}\t{$src2, $dst|$dst, $src2}", []>; def ADC16rr_REV : I<0x13, MRMSrcReg, (outs GR16:$dst), @@ -2801,6 +2864,7 @@ def ADC16rr_REV : I<0x13, MRMSrcReg, (outs GR16:$dst), def ADC32rr_REV : I<0x13, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "adc{l}\t{$src2, $dst|$dst, $src2}", []>; +} def ADC8rm : I<0x12, MRMSrcMem , (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2), @@ -2888,6 +2952,7 @@ def SUB32rr : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2), [(set GR32:$dst, EFLAGS, (X86sub_flag GR32:$src1, GR32:$src2))]>; +let isCodeGenOnly = 1 in { def SUB8rr_REV : I<0x2A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), "sub{b}\t{$src2, $dst|$dst, $src2}", []>; def SUB16rr_REV : I<0x2B, MRMSrcReg, (outs GR16:$dst), @@ -2896,6 +2961,7 @@ def SUB16rr_REV : I<0x2B, MRMSrcReg, (outs GR16:$dst), def SUB32rr_REV : I<0x2B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "sub{l}\t{$src2, $dst|$dst, $src2}", []>; +} // Register-Memory Subtraction def SUB8rm : I<0x2A, MRMSrcMem, (outs GR8 :$dst), @@ -3039,6 +3105,7 @@ let isTwoAddress = 0 in { "sbb{l}\t{$src, %eax|%eax, $src}", []>; } +let isCodeGenOnly = 1 in { def SBB8rr_REV : I<0x1A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), "sbb{b}\t{$src2, $dst|$dst, $src2}", []>; def SBB16rr_REV : I<0x1B, MRMSrcReg, (outs GR16:$dst), @@ -3047,6 +3114,7 @@ def SBB16rr_REV : I<0x1B, MRMSrcReg, (outs GR16:$dst), def SBB32rr_REV : I<0x1B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "sbb{l}\t{$src2, $dst|$dst, $src2}", []>; +} def SBB8rm : I<0x1A, MRMSrcMem, (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2), "sbb{b}\t{$src2, $dst|$dst, $src2}", @@ -3864,12 +3932,14 @@ def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB; +let mayLoad = 1, mayStore = 1 in { def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB; def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; def XADD32rm : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB; +} def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src), "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB; @@ -3878,12 +3948,14 @@ def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB; +let mayLoad = 1, mayStore = 1 in { def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB; def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB; +} let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), @@ -3891,7 +3963,7 @@ def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), // Optimized codegen when the non-memory output is not used. // FIXME: Use normal add / sub instructions and add lock prefix dynamically. -let Defs = [EFLAGS] in { +let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in { def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), "lock\n\t" "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; @@ -4453,7 +4525,7 @@ def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>; // Except for i16 -> i32 since isel expect i16 ops to be promoted to i32. def : Pat<(i32 (anyext GR16:$src)), - (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>; + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>; //===----------------------------------------------------------------------===// @@ -4473,81 +4545,81 @@ def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst), // r & (2^16-1) ==> movz def : Pat<(and GR32:$src1, 0xffff), - (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit))>; + (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>; // r & (2^8-1) ==> movz def : Pat<(and GR32:$src1, 0xff), (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1, GR32_ABCD)), - x86_subreg_8bit))>, + sub_8bit))>, Requires<[In32BitMode]>; // r & (2^8-1) ==> movz def : Pat<(and GR16:$src1, 0xff), (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD)), - x86_subreg_8bit))>, + sub_8bit))>, Requires<[In32BitMode]>; // sext_inreg patterns def : Pat<(sext_inreg GR32:$src, i16), - (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>; + (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>; def : Pat<(sext_inreg GR32:$src, i8), (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit))>, + sub_8bit))>, Requires<[In32BitMode]>; def : Pat<(sext_inreg GR16:$src, i8), (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit))>, + sub_8bit))>, Requires<[In32BitMode]>; // trunc patterns def : Pat<(i16 (trunc GR32:$src)), - (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit)>; + (EXTRACT_SUBREG GR32:$src, sub_16bit)>; def : Pat<(i8 (trunc GR32:$src)), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit)>, + sub_8bit)>, Requires<[In32BitMode]>; def : Pat<(i8 (trunc GR16:$src)), (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit)>, + sub_8bit)>, Requires<[In32BitMode]>; // h-register tricks def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))), (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi)>, + sub_8bit_hi)>, Requires<[In32BitMode]>; def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit_hi)>, + sub_8bit_hi)>, Requires<[In32BitMode]>; def : Pat<(srl GR16:$src, (i8 8)), (EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi)), - x86_subreg_16bit)>, + sub_8bit_hi)), + sub_16bit)>, Requires<[In32BitMode]>; def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In32BitMode]>; def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In32BitMode]>; def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In32BitMode]>; def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In32BitMode]>; // (shl x, 1) ==> (add x, x) diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 744af50..0952fc8 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -117,9 +117,6 @@ def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; -def MMX_MOVQ64gmr : MMXRI<0x7F, MRMDestMem, (outs), - (ins i64mem:$dst, VR64:$src), - "movq\t{$src, $dst|$dst, $src}", []>; let neverHasSideEffects = 1 in def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), @@ -133,10 +130,10 @@ let neverHasSideEffects = 1 in def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; -def MMX_MOVD64rrv164 : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), - "movd\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, - (v1i64 (scalar_to_vector GR64:$src)))]>; +def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR64:$dst, + (v1i64 (scalar_to_vector GR64:$src)))]>; let neverHasSideEffects = 1 in def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 2129580..5580ba7 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -117,17 +117,17 @@ def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return cast<LoadSDNode>(N)->getAlignment() >= 16; }]>; -def alignedloadfsf32 : PatFrag<(ops node:$ptr), +def alignedloadfsf32 : PatFrag<(ops node:$ptr), (f32 (alignedload node:$ptr))>; -def alignedloadfsf64 : PatFrag<(ops node:$ptr), +def alignedloadfsf64 : PatFrag<(ops node:$ptr), (f64 (alignedload node:$ptr))>; -def alignedloadv4f32 : PatFrag<(ops node:$ptr), +def alignedloadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (alignedload node:$ptr))>; -def alignedloadv2f64 : PatFrag<(ops node:$ptr), +def alignedloadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (alignedload node:$ptr))>; -def alignedloadv4i32 : PatFrag<(ops node:$ptr), +def alignedloadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (alignedload node:$ptr))>; -def alignedloadv2i64 : PatFrag<(ops node:$ptr), +def alignedloadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (alignedload node:$ptr))>; // Like 'load', but uses special alignment checks suitable for use in @@ -387,11 +387,11 @@ def MOVSSrr : SSI<0x10, MRMSrcReg, let AddedComplexity = 15 in def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)), (MOVSSrr (v4f32 VR128:$src1), - (EXTRACT_SUBREG (v4f32 VR128:$src2), x86_subreg_ss))>; + (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; // Implicitly promote a 32-bit scalar to a vector. def : Pat<(v4f32 (scalar_to_vector FR32:$src)), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, x86_subreg_ss)>; + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; // Loading from memory automatically zeroing upper bits. let canFoldAsLoad = 1, isReMaterializable = 1 in @@ -403,11 +403,11 @@ def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), // with SUBREG_TO_REG. let AddedComplexity = 20 in { def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>; + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>; + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>; + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; } // Store scalar value to memory. @@ -419,7 +419,7 @@ def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), addr:$dst), (MOVSSmr addr:$dst, - (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>; + (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; // Conversion instructions def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src), @@ -449,7 +449,7 @@ def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src), [(set GR32:$dst, (int_x86_sse_cvtss2si (load addr:$src)))]>; -// Match intrinisics which expect MM and XMM operand(s). +// Match intrinsics which expect MM and XMM operand(s). def Int_CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "cvtps2pi\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>; @@ -509,6 +509,17 @@ let mayLoad = 1 in def CMPSSrm : SSIi8<0xC2, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc), "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>; + + // Accept explicit immediate argument form instead of comparison code. +let isAsmParserOnly = 1 in { + def CMPSSrr_alt : SSIi8<0xC2, MRMSrcReg, + (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2), + "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>; +let mayLoad = 1 in + def CMPSSrm_alt : SSIi8<0xC2, MRMSrcMem, + (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2), + "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>; +} } let Defs = [EFLAGS] in { @@ -518,25 +529,25 @@ def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2), def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2), "ucomiss\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86cmp FR32:$src1, (loadf32 addr:$src2)))]>; - + def COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "comiss\t{$src2, $src1|$src1, $src2}", []>; def COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "comiss\t{$src2, $src1|$src1, $src2}", []>; - + } // Defs = [EFLAGS] // Aliases to match intrinsics which expect XMM operand(s). let Constraints = "$src1 = $dst" in { def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), + (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc), "cmp${cc}ss\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cmp_ss + [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1, VR128:$src, imm:$cc))]>; def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), + (outs VR128:$dst), (ins VR128:$src1, f32mem:$src, SSECC:$cc), "cmp${cc}ss\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1, @@ -1009,6 +1020,16 @@ let Constraints = "$src1 = $dst" in { "cmp${cc}ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1, (memop addr:$src), imm:$cc))]>; + + // Accept explicit immediate argument form instead of comparison code. +let isAsmParserOnly = 1 in { + def CMPPSrri_alt : PSIi8<0xC2, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2), + "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>; + def CMPPSrmi_alt : PSIi8<0xC2, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2), + "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>; +} } def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; @@ -1102,7 +1123,8 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), } // Load, store, and memory fence -def SFENCE : PSI<0xAE, MRM7r, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>; +def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, + TB, Requires<[HasSSE1]>; // MXCSR register def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src), @@ -1130,7 +1152,7 @@ def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>; def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>; def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>; + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; //===---------------------------------------------------------------------===// // SSE2 Instructions @@ -1152,11 +1174,11 @@ def MOVSDrr : SDI<0x10, MRMSrcReg, let AddedComplexity = 15 in def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), (MOVSDrr (v2f64 VR128:$src1), - (EXTRACT_SUBREG (v2f64 VR128:$src2), x86_subreg_sd))>; + (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; // Implicitly promote a 64-bit scalar to a vector. def : Pat<(v2f64 (scalar_to_vector FR64:$src)), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, x86_subreg_sd)>; + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; // Loading from memory automatically zeroing upper bits. let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 20 in @@ -1168,15 +1190,15 @@ def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), // with SUBREG_TO_REG. let AddedComplexity = 20 in { def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; def : Pat<(v2f64 (X86vzload addr:$src)), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; } // Store scalar value to memory. @@ -1188,7 +1210,7 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), addr:$dst), (MOVSDmr addr:$dst, - (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>; + (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; // Conversion instructions def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src), @@ -1255,7 +1277,7 @@ def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src), [(set GR32:$dst, (int_x86_sse2_cvtsd2si (load addr:$src)))]>; -// Match intrinisics which expect MM and XMM operand(s). +// Match intrinsics which expect MM and XMM operand(s). def Int_CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "cvtpd2pi\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>; @@ -1297,6 +1319,17 @@ let mayLoad = 1 in def CMPSDrm : SDIi8<0xC2, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc), "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>; + + // Accept explicit immediate argument form instead of comparison code. +let isAsmParserOnly = 1 in { + def CMPSDrr_alt : SDIi8<0xC2, MRMSrcReg, + (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2), + "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; +let mayLoad = 1 in + def CMPSDrm_alt : SDIi8<0xC2, MRMSrcMem, + (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2), + "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; +} } let Defs = [EFLAGS] in { @@ -1311,13 +1344,13 @@ def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2), // Aliases to match intrinsics which expect XMM operand(s). let Constraints = "$src1 = $dst" in { def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), + (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc), "cmp${cc}sd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1, VR128:$src, imm:$cc))]>; def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), + (outs VR128:$dst), (ins VR128:$src1, f64mem:$src, SSECC:$cc), "cmp${cc}sd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1, @@ -1655,7 +1688,7 @@ def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, + [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))]>, XS, Requires<[HasSSE2]>; def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), @@ -1890,6 +1923,16 @@ let Constraints = "$src1 = $dst" in { "cmp${cc}pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1, (memop addr:$src), imm:$cc))]>; + + // Accept explicit immediate argument form instead of comparison code. +let isAsmParserOnly = 1 in { + def CMPPDrri_alt : PDIi8<0xC2, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2), + "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; + def CMPPDrmi_alt : PDIi8<0xC2, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2), + "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; +} } def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; @@ -1980,24 +2023,24 @@ let Constraints = "$src1 = $dst" in { multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, bit Commutable = 0> { - def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), + def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> { let isCommutable = Commutable; } - def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), + def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (IntId VR128:$src1, - (bitconvert (memopv2i64 + (bitconvert (memopv2i64 addr:$src2))))]>; } multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, string OpcodeStr, Intrinsic IntId, Intrinsic IntId2> { - def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), + def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>; @@ -2006,7 +2049,7 @@ multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (IntId VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))]>; - def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), + def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>; @@ -2015,13 +2058,13 @@ multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, /// PDI_binop_rm - Simple SSE2 binary operator. multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, ValueType OpVT, bit Commutable = 0> { - def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), + def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> { let isCommutable = Commutable; } - def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), + def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (OpVT (OpNode VR128:$src1, @@ -2416,6 +2459,10 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins), def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; +// Pause. This "instruction" is encoded as "rep; nop", so even though it +// was introduced with SSE2, it's backward compatible. +def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP; + //TODO: custom lower this so as to never even generate the noop def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 0)), (NOOP)>; @@ -2462,7 +2509,7 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), (iPTR 0))), addr:$dst)]>; def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), - (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>; + (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", @@ -2903,7 +2950,7 @@ defm PMADDUBSW : SS3I_binop_rm_int_8 <0x04, "pmaddubsw", defm PMULHRSW : SS3I_binop_rm_int_16<0x0B, "pmulhrsw", int_x86_ssse3_pmul_hr_sw, int_x86_ssse3_pmul_hr_sw_128, 1>; - + defm PSHUFB : SS3I_binop_rm_int_8 <0x00, "pshufb", int_x86_ssse3_pshuf_b, int_x86_ssse3_pshuf_b_128>; @@ -3042,10 +3089,10 @@ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), (MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>; def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), (MOVSSrr (v4f32 (V_SET0PS)), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>; + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), (MOVSSrr (v4i32 (V_SET0PI)), - (EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>; + (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; } // Splat v2f64 / v2i64 @@ -3181,17 +3228,17 @@ let AddedComplexity = 15 in { // Setting the lowest element in the vector. def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), (MOVSSrr (v4i32 VR128:$src1), - (EXTRACT_SUBREG (v4i32 VR128:$src2), x86_subreg_ss))>; + (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), (MOVSDrr (v2i64 VR128:$src1), - (EXTRACT_SUBREG (v2i64 VR128:$src2), x86_subreg_sd))>; + (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>, + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>, Requires<[HasSSE2]>; def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>, + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>, Requires<[HasSSE2]>; } @@ -3464,14 +3511,14 @@ let Constraints = "$src1 = $dst" in { let Constraints = "$src1 = $dst" in { multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, ValueType OpVT, bit Commutable = 0> { - def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), + def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>, OpSize { let isCommutable = Commutable; } - def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), + def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (OpNode VR128:$src1, @@ -3949,15 +3996,15 @@ let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, VR128:$src3, i8imm:$src5), "#PCMPESTRM128rr PSEUDO!", - [(set VR128:$dst, - (int_x86_sse42_pcmpestrm128 + [(set VR128:$dst, + (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize; def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src3, i8imm:$src5), "#PCMPESTRM128rm PSEUDO!", - [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 - VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>, + [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 + VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>, OpSize; } @@ -3972,7 +4019,7 @@ def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), let Defs = [ECX, EFLAGS] in { multiclass SS42AI_pcmpistri<Intrinsic IntId128> { - def rr : SS42AI<0x63, MRMSrcReg, (outs), + def rr : SS42AI<0x63, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}", [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)), @@ -4003,7 +4050,7 @@ let Uses = [EAX, EDX] in { def rm : SS42AI<0x61, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src3, i8imm:$src5), "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}", - [(set ECX, + [(set ECX, (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)), (implicit EFLAGS)]>, OpSize; } @@ -4081,16 +4128,15 @@ def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), OpSize; def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, i32i8imm:$src2), + (ins VR128:$src1, i8imm:$src2), "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, OpSize; def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), - (ins i128mem:$src1, i32i8imm:$src2), + (ins i128mem:$src1, i8imm:$src2), "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)), imm:$src2))]>, OpSize; - diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index a3e04b0..98975ea 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -37,7 +37,6 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -145,6 +144,19 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { case X86::XMM7: case X86::XMM15: case X86::MM7: return 7; + case X86::ES: + return 0; + case X86::CS: + return 1; + case X86::SS: + return 2; + case X86::DS: + return 3; + case X86::FS: + return 4; + case X86::GS: + return 5; + default: assert(isVirtualRegister(RegNo) && "Unknown physical register!"); llvm_unreachable("Register allocator hasn't allocated reg correctly yet!"); @@ -158,8 +170,7 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, unsigned SubIdx) const { switch (SubIdx) { default: return 0; - case 1: - // 8-bit + case X86::sub_8bit: if (B == &X86::GR8RegClass) { if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8) return A; @@ -191,12 +202,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return &X86::GR16_NOREXRegClass; else if (A == &X86::GR16_ABCDRegClass) return &X86::GR16_ABCDRegClass; - } else if (B == &X86::FR32RegClass) { - return A; } break; - case 2: - // 8-bit hi + case X86::sub_8bit_hi: if (B == &X86::GR8_ABCD_HRegClass) { if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || A == &X86::GR64_NOREXRegClass || @@ -209,12 +217,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass || A == &X86::GR16_NOREXRegClass) return &X86::GR16_ABCDRegClass; - } else if (B == &X86::FR64RegClass) { - return A; } break; - case 3: - // 16-bit + case X86::sub_16bit: if (B == &X86::GR16RegClass) { if (A->getSize() == 4 || A->getSize() == 8) return A; @@ -238,12 +243,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return &X86::GR32_NOREXRegClass; else if (A == &X86::GR32_ABCDRegClass) return &X86::GR64_ABCDRegClass; - } else if (B == &X86::VR128RegClass) { - return A; } break; - case 4: - // 32-bit + case X86::sub_32bit: if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) { if (A->getSize() == 8) return A; @@ -261,6 +263,18 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return &X86::GR64_ABCDRegClass; } break; + case X86::sub_ss: + if (B == &X86::FR32RegClass) + return A; + break; + case X86::sub_sd: + if (B == &X86::FR64RegClass) + return A; + break; + case X86::sub_xmm: + if (B == &X86::VR128RegClass) + return A; + break; } return 0; } @@ -518,6 +532,30 @@ X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const { return Offset; } +static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { + if (is64Bit) { + if (isInt<8>(Imm)) + return X86::SUB64ri8; + return X86::SUB64ri32; + } else { + if (isInt<8>(Imm)) + return X86::SUB32ri8; + return X86::SUB32ri; + } +} + +static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) { + if (is64Bit) { + if (isInt<8>(Imm)) + return X86::ADD64ri8; + return X86::ADD64ri32; + } else { + if (isInt<8>(Imm)) + return X86::ADD32ri8; + return X86::ADD32ri; + } +} + void X86RegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { @@ -537,7 +575,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineInstr *New = 0; if (Old->getOpcode() == getCallFrameSetupOpcode()) { New = BuildMI(MF, Old->getDebugLoc(), - TII.get(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri), + TII.get(getSUBriOpcode(Is64Bit, Amount)), StackPtr) .addReg(StackPtr) .addImm(Amount); @@ -549,9 +587,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, Amount -= CalleeAmt; if (Amount) { - unsigned Opc = (Amount < 128) ? - (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) : - (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri); + unsigned Opc = getADDriOpcode(Is64Bit, Amount); New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr) .addReg(StackPtr) .addImm(Amount); @@ -571,9 +607,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // something off the stack pointer, add it back. We do this until we have // more advanced stack pointer tracking ability. if (uint64_t CalleeAmt = I->getOperand(1).getImm()) { - unsigned Opc = (CalleeAmt < 128) ? - (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) : - (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri); + unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt); MachineInstr *Old = I; MachineInstr *New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), @@ -691,13 +725,9 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const TargetInstrInfo &TII) { bool isSub = NumBytes < 0; uint64_t Offset = isSub ? -NumBytes : NumBytes; - unsigned Opc = isSub - ? ((Offset < 128) ? - (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) : - (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri)) - : ((Offset < 128) ? - (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) : - (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri)); + unsigned Opc = isSub ? + getSUBriOpcode(Is64Bit, Offset) : + getADDriOpcode(Is64Bit, Offset); uint64_t Chunk = (1LL << 31) - 1; DebugLoc DL = MBB.findDebugLoc(MBBI); @@ -899,7 +929,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && !needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. - !MFI->hasCalls() && // No calls. + !MFI->adjustsStack() && // No calls. !Subtarget->isTargetWin64()) { // Win64 has no Red Zone uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; @@ -917,7 +947,8 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { // size is bigger than the callers. if (TailCallReturnAddrDelta < 0) { MachineInstr *MI = - BuildMI(MBB, MBBI, DL, TII.get(Is64Bit? X86::SUB64ri32 : X86::SUB32ri), + BuildMI(MBB, MBBI, DL, + TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)), StackPtr) .addReg(StackPtr) .addImm(-TailCallReturnAddrDelta); @@ -1307,7 +1338,7 @@ X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const { // Calculate amount of bytes used for return address storing int stackGrowth = (Is64Bit ? -8 : -4); - // Initial state of the frame pointer is esp+4. + // Initial state of the frame pointer is esp+stackGrowth. MachineLocation Dst(MachineLocation::VirtualFP); MachineLocation Src(StackPtr, stackGrowth); Moves.push_back(MachineMove(0, Dst, Src)); diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index ac96c4c..d0b82e2 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -30,16 +30,6 @@ namespace N86 { }; } -namespace X86 { - /// SubregIndex - The index of various sized subregister classes. Note that - /// these indices must be kept in sync with the class indices in the - /// X86RegisterInfo.td file. - enum SubregIndex { - SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4, - SUBREG_SS = 1, SUBREG_SD = 2, SUBREG_XMM = 3 - }; -} - /// DWARFFlavour - Flavour of dwarf regnumbers /// namespace DWARFFlavour { diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 49a6ca0..91cfaa9 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -18,6 +18,17 @@ // let Namespace = "X86" in { + // Subregister indices. + def sub_8bit : SubRegIndex; + def sub_8bit_hi : SubRegIndex; + def sub_16bit : SubRegIndex; + def sub_32bit : SubRegIndex; + + def sub_ss : SubRegIndex; + def sub_sd : SubRegIndex; + def sub_xmm : SubRegIndex; + + // In the register alias definitions below, we define which registers alias // which others. We only specify which registers the small registers alias, // because the register file generator is smart enough to figure out that @@ -57,17 +68,22 @@ let Namespace = "X86" in { def BH : Register<"bh">, DwarfRegNum<[3, 3, 3]>; // 16-bit registers + let SubRegIndices = [sub_8bit, sub_8bit_hi] in { def AX : RegisterWithSubRegs<"ax", [AL,AH]>, DwarfRegNum<[0, 0, 0]>; def DX : RegisterWithSubRegs<"dx", [DL,DH]>, DwarfRegNum<[1, 2, 2]>; def CX : RegisterWithSubRegs<"cx", [CL,CH]>, DwarfRegNum<[2, 1, 1]>; def BX : RegisterWithSubRegs<"bx", [BL,BH]>, DwarfRegNum<[3, 3, 3]>; + } + let SubRegIndices = [sub_8bit] in { def SI : RegisterWithSubRegs<"si", [SIL]>, DwarfRegNum<[4, 6, 6]>; def DI : RegisterWithSubRegs<"di", [DIL]>, DwarfRegNum<[5, 7, 7]>; def BP : RegisterWithSubRegs<"bp", [BPL]>, DwarfRegNum<[6, 4, 5]>; def SP : RegisterWithSubRegs<"sp", [SPL]>, DwarfRegNum<[7, 5, 4]>; + } def IP : Register<"ip">, DwarfRegNum<[16]>; // X86-64 only + let SubRegIndices = [sub_8bit] in { def R8W : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>; def R9W : RegisterWithSubRegs<"r9w", [R9B]>, DwarfRegNum<[9, -2, -2]>; def R10W : RegisterWithSubRegs<"r10w", [R10B]>, DwarfRegNum<[10, -2, -2]>; @@ -76,8 +92,9 @@ let Namespace = "X86" in { def R13W : RegisterWithSubRegs<"r13w", [R13B]>, DwarfRegNum<[13, -2, -2]>; def R14W : RegisterWithSubRegs<"r14w", [R14B]>, DwarfRegNum<[14, -2, -2]>; def R15W : RegisterWithSubRegs<"r15w", [R15B]>, DwarfRegNum<[15, -2, -2]>; - + } // 32-bit registers + let SubRegIndices = [sub_16bit] in { def EAX : RegisterWithSubRegs<"eax", [AX]>, DwarfRegNum<[0, 0, 0]>; def EDX : RegisterWithSubRegs<"edx", [DX]>, DwarfRegNum<[1, 2, 2]>; def ECX : RegisterWithSubRegs<"ecx", [CX]>, DwarfRegNum<[2, 1, 1]>; @@ -97,8 +114,10 @@ let Namespace = "X86" in { def R13D : RegisterWithSubRegs<"r13d", [R13W]>, DwarfRegNum<[13, -2, -2]>; def R14D : RegisterWithSubRegs<"r14d", [R14W]>, DwarfRegNum<[14, -2, -2]>; def R15D : RegisterWithSubRegs<"r15d", [R15W]>, DwarfRegNum<[15, -2, -2]>; + } // 64-bit registers, X86-64 only + let SubRegIndices = [sub_32bit] in { def RAX : RegisterWithSubRegs<"rax", [EAX]>, DwarfRegNum<[0, -2, -2]>; def RDX : RegisterWithSubRegs<"rdx", [EDX]>, DwarfRegNum<[1, -2, -2]>; def RCX : RegisterWithSubRegs<"rcx", [ECX]>, DwarfRegNum<[2, -2, -2]>; @@ -117,6 +136,7 @@ let Namespace = "X86" in { def R14 : RegisterWithSubRegs<"r14", [R14D]>, DwarfRegNum<[14, -2, -2]>; def R15 : RegisterWithSubRegs<"r15", [R15D]>, DwarfRegNum<[15, -2, -2]>; def RIP : RegisterWithSubRegs<"rip", [EIP]>, DwarfRegNum<[16, -2, -2]>; + } // MMX Registers. These are actually aliased to ST0 .. ST7 def MM0 : Register<"mm0">, DwarfRegNum<[41, 29, 29]>; @@ -137,7 +157,9 @@ let Namespace = "X86" in { def FP5 : Register<"fp5">; def FP6 : Register<"fp6">; - // XMM Registers, used by the various SSE instruction set extensions + // XMM Registers, used by the various SSE instruction set extensions. + // The sub_ss and sub_sd subregs are the same registers with another regclass. + let CompositeIndices = [(sub_ss), (sub_sd)] in { def XMM0: Register<"xmm0">, DwarfRegNum<[17, 21, 21]>; def XMM1: Register<"xmm1">, DwarfRegNum<[18, 22, 22]>; def XMM2: Register<"xmm2">, DwarfRegNum<[19, 23, 23]>; @@ -156,8 +178,10 @@ let Namespace = "X86" in { def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>; def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>; def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>; + } // YMM Registers, used by AVX instructions + let SubRegIndices = [sub_xmm] in { def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegNum<[17, 21, 21]>; def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegNum<[18, 22, 22]>; def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegNum<[19, 23, 23]>; @@ -174,6 +198,7 @@ let Namespace = "X86" in { def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegNum<[30, -2, -2]>; def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegNum<[31, -2, -2]>; def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegNum<[32, -2, -2]>; + } // Floating point stack registers def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>; @@ -207,106 +232,19 @@ let Namespace = "X86" in { def DR7 : Register<"dr7">; // Condition registers - def ECR0 : Register<"ecr0">; - def ECR1 : Register<"ecr1">; - def ECR2 : Register<"ecr2">; - def ECR3 : Register<"ecr3">; - def ECR4 : Register<"ecr4">; - def ECR5 : Register<"ecr5">; - def ECR6 : Register<"ecr6">; - def ECR7 : Register<"ecr7">; - - def RCR0 : Register<"rcr0">; - def RCR1 : Register<"rcr1">; - def RCR2 : Register<"rcr2">; - def RCR3 : Register<"rcr3">; - def RCR4 : Register<"rcr4">; - def RCR5 : Register<"rcr5">; - def RCR6 : Register<"rcr6">; - def RCR7 : Register<"rcr7">; - def RCR8 : Register<"rcr8">; + def CR0 : Register<"cr0">; + def CR1 : Register<"cr1">; + def CR2 : Register<"cr2">; + def CR3 : Register<"cr3">; + def CR4 : Register<"cr4">; + def CR5 : Register<"cr5">; + def CR6 : Register<"cr6">; + def CR7 : Register<"cr7">; + def CR8 : Register<"cr8">; } //===----------------------------------------------------------------------===// -// Subregister Set Definitions... now that we have all of the pieces, define the -// sub registers for each register. -// - -def x86_subreg_8bit : PatLeaf<(i32 1)>; -def x86_subreg_8bit_hi : PatLeaf<(i32 2)>; -def x86_subreg_16bit : PatLeaf<(i32 3)>; -def x86_subreg_32bit : PatLeaf<(i32 4)>; - -def x86_subreg_ss : PatLeaf<(i32 1)>; -def x86_subreg_sd : PatLeaf<(i32 2)>; -def x86_subreg_xmm : PatLeaf<(i32 3)>; - -def : SubRegSet<1, [AX, CX, DX, BX, SP, BP, SI, DI, - R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W], - [AL, CL, DL, BL, SPL, BPL, SIL, DIL, - R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>; - -def : SubRegSet<2, [AX, CX, DX, BX], - [AH, CH, DH, BH]>; - -def : SubRegSet<1, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, - R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D], - [AL, CL, DL, BL, SPL, BPL, SIL, DIL, - R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>; - -def : SubRegSet<2, [EAX, ECX, EDX, EBX], - [AH, CH, DH, BH]>; - -def : SubRegSet<3, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, - R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D], - [AX, CX, DX, BX, SP, BP, SI, DI, - R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>; - -def : SubRegSet<1, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, - R8, R9, R10, R11, R12, R13, R14, R15], - [AL, CL, DL, BL, SPL, BPL, SIL, DIL, - R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>; - -def : SubRegSet<2, [RAX, RCX, RDX, RBX], - [AH, CH, DH, BH]>; - -def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, - R8, R9, R10, R11, R12, R13, R14, R15], - [AX, CX, DX, BX, SP, BP, SI, DI, - R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>; - -def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, - R8, R9, R10, R11, R12, R13, R14, R15], - [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, - R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>; - -def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, - YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15], - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; - -def : SubRegSet<2, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, - YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15], - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; - -def : SubRegSet<3, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, - YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15], - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; - -def : SubRegSet<1, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15], - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; - -def : SubRegSet<2, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15], - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; - -//===----------------------------------------------------------------------===// // Register Class Definitions... now that we have all of the pieces, define the // top-level register classes. The order specified in the register list is // implicitly defined to be the register allocation order. @@ -370,7 +308,7 @@ def GR8 : RegisterClass<"X86", [i8], 8, def GR16 : RegisterClass<"X86", [i16], 16, [AX, CX, DX, SI, DI, BX, BP, SP, R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> { - let SubRegClassList = [GR8, GR8]; + let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -422,7 +360,7 @@ def GR16 : RegisterClass<"X86", [i16], 16, def GR32 : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> { - let SubRegClassList = [GR8, GR8, GR16]; + let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -477,7 +415,9 @@ def GR32 : RegisterClass<"X86", [i32], 32, def GR64 : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, RBX, R14, R15, R12, R13, RBP, RSP, RIP]> { - let SubRegClassList = [GR8, GR8, GR16, GR32]; + let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), + (GR16 sub_16bit), + (GR32 sub_32bit)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -511,14 +451,8 @@ def DEBUG_REG : RegisterClass<"X86", [i32], 32, } // Control registers. -def CONTROL_REG_32 : RegisterClass<"X86", [i32], 32, - [ECR0, ECR1, ECR2, ECR3, ECR4, ECR5, ECR6, - ECR7]> { -} - -def CONTROL_REG_64 : RegisterClass<"X86", [i64], 64, - [RCR0, RCR1, RCR2, RCR3, RCR4, RCR5, RCR6, - RCR7, RCR8]> { +def CONTROL_REG : RegisterClass<"X86", [i64], 64, + [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8]> { } // GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of @@ -532,20 +466,27 @@ def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]> { def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]> { } def GR16_ABCD : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> { - let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H]; + let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi)]; } def GR32_ABCD : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> { - let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD]; + let SubRegClasses = [(GR8_ABCD_L sub_8bit), + (GR8_ABCD_H sub_8bit_hi), + (GR16_ABCD sub_16bit)]; } def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> { - let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD]; + let SubRegClasses = [(GR8_ABCD_L sub_8bit), + (GR8_ABCD_H sub_8bit_hi), + (GR16_ABCD sub_16bit), + (GR32_ABCD sub_32bit)]; } def GR32_TC : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX]> { - let SubRegClassList = [GR8, GR8, GR16]; + let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; } def GR64_TC : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, R8, R9, R11]> { - let SubRegClassList = [GR8, GR8, GR16, GR32_TC]; + let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), + (GR16 sub_16bit), + (GR32_TC sub_32bit)]; } // GR8_NOREX - GR8 registers which do not require a REX prefix. @@ -585,7 +526,7 @@ def GR8_NOREX : RegisterClass<"X86", [i8], 8, // GR16_NOREX - GR16 registers which do not require a REX prefix. def GR16_NOREX : RegisterClass<"X86", [i16], 16, [AX, CX, DX, SI, DI, BX, BP, SP]> { - let SubRegClassList = [GR8_NOREX, GR8_NOREX]; + let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -608,7 +549,8 @@ def GR16_NOREX : RegisterClass<"X86", [i16], 16, // GR32_NOREX - GR32 registers which do not require a REX prefix. def GR32_NOREX : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> { - let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX]; + let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi), + (GR16_NOREX sub_16bit)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -631,7 +573,9 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32, // GR64_NOREX - GR64 registers which do not require a REX prefix. def GR64_NOREX : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP]> { - let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX]; + let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi), + (GR16_NOREX sub_16bit), + (GR32_NOREX sub_32bit)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -656,7 +600,7 @@ def GR64_NOREX : RegisterClass<"X86", [i64], 64, def GR32_NOSP : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, ESI, EDI, EBX, EBP, R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> { - let SubRegClassList = [GR8, GR8, GR16]; + let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -709,7 +653,9 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32, def GR64_NOSP : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, RBX, R14, R15, R12, R13, RBP]> { - let SubRegClassList = [GR8, GR8, GR16, GR32_NOSP]; + let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), + (GR16 sub_16bit), + (GR32_NOSP sub_32bit)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -734,7 +680,9 @@ def GR64_NOSP : RegisterClass<"X86", [i64], 64, // GR64_NOREX_NOSP - GR64_NOREX registers except RSP. def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, RBX, RBP]> { - let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX]; + let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi), + (GR16_NOREX sub_16bit), + (GR32_NOREX sub_32bit)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -758,7 +706,9 @@ def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64, // A class to support the 'A' assembler constraint: EAX then EDX. def GR32_AD : RegisterClass<"X86", [i32], 32, [EAX, EDX]> { - let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD]; + let SubRegClasses = [(GR8_ABCD_L sub_8bit), + (GR8_ABCD_H sub_8bit_hi), + (GR16_ABCD sub_16bit)]; } // Scalar SSE2 floating point registers. @@ -836,7 +786,8 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]> { - let SubRegClassList = [FR32, FR64]; + let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)]; + let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -856,7 +807,7 @@ def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15]> { - let SubRegClassList = [FR32, FR64, VR128]; + let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)]; } // Status flags registers. diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index cd87b82..6297a27 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -12,11 +12,232 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "x86-selectiondag-info" -#include "X86SelectionDAGInfo.h" +#include "X86TargetMachine.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/SelectionDAG.h" using namespace llvm; -X86SelectionDAGInfo::X86SelectionDAGInfo() { +X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) : + TargetSelectionDAGInfo(TM), + Subtarget(&TM.getSubtarget<X86Subtarget>()), + TLI(*TM.getTargetLowering()) { } X86SelectionDAGInfo::~X86SelectionDAGInfo() { } + +SDValue +X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, + const Value *DstSV, + uint64_t DstSVOff) const { + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + + // If not DWORD aligned or size is more than the threshold, call the library. + // The libc version is likely to be faster for these cases. It can use the + // address value and run time information about the CPU. + if ((Align & 3) != 0 || + !ConstantSize || + ConstantSize->getZExtValue() > + Subtarget->getMaxInlineSizeThreshold()) { + SDValue InFlag(0, 0); + + // Check to see if there is a specialized entry-point for memory zeroing. + ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); + + if (const char *bzeroEntry = V && + V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { + EVT IntPtr = TLI.getPointerTy(); + const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Dst; + Entry.Ty = IntPtrTy; + Args.push_back(Entry); + Entry.Node = Size; + Args.push_back(Entry); + std::pair<SDValue,SDValue> CallResult = + TLI.LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), + false, false, false, false, + 0, CallingConv::C, false, /*isReturnValueUsed=*/false, + DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, + DAG, dl); + return CallResult.second; + } + + // Otherwise have the target-independent code call memset. + return SDValue(); + } + + uint64_t SizeVal = ConstantSize->getZExtValue(); + SDValue InFlag(0, 0); + EVT AVT; + SDValue Count; + ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); + unsigned BytesLeft = 0; + bool TwoRepStos = false; + if (ValC) { + unsigned ValReg; + uint64_t Val = ValC->getZExtValue() & 255; + + // If the value is a constant, then we can potentially use larger sets. + switch (Align & 3) { + case 2: // WORD aligned + AVT = MVT::i16; + ValReg = X86::AX; + Val = (Val << 8) | Val; + break; + case 0: // DWORD aligned + AVT = MVT::i32; + ValReg = X86::EAX; + Val = (Val << 8) | Val; + Val = (Val << 16) | Val; + if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned + AVT = MVT::i64; + ValReg = X86::RAX; + Val = (Val << 32) | Val; + } + break; + default: // Byte aligned + AVT = MVT::i8; + ValReg = X86::AL; + Count = DAG.getIntPtrConstant(SizeVal); + break; + } + + if (AVT.bitsGT(MVT::i8)) { + unsigned UBytes = AVT.getSizeInBits() / 8; + Count = DAG.getIntPtrConstant(SizeVal / UBytes); + BytesLeft = SizeVal % UBytes; + } + + Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT), + InFlag); + InFlag = Chain.getValue(1); + } else { + AVT = MVT::i8; + Count = DAG.getIntPtrConstant(SizeVal); + Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag); + InFlag = Chain.getValue(1); + } + + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : + X86::ECX, + Count, InFlag); + InFlag = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : + X86::EDI, + Dst, InFlag); + InFlag = Chain.getValue(1); + + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; + Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); + + if (TwoRepStos) { + InFlag = Chain.getValue(1); + Count = Size; + EVT CVT = Count.getValueType(); + SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, + DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); + Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : + X86::ECX, + Left, InFlag); + InFlag = Chain.getValue(1); + Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; + Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); + } else if (BytesLeft) { + // Handle the last 1 - 7 bytes. + unsigned Offset = SizeVal - BytesLeft; + EVT AddrVT = Dst.getValueType(); + EVT SizeVT = Size.getValueType(); + + Chain = DAG.getMemset(Chain, dl, + DAG.getNode(ISD::ADD, dl, AddrVT, Dst, + DAG.getConstant(Offset, AddrVT)), + Src, + DAG.getConstant(BytesLeft, SizeVT), + Align, isVolatile, DstSV, DstSVOff + Offset); + } + + // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. + return Chain; +} + +SDValue +X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const { + // This requires the copy size to be a constant, preferrably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (!ConstantSize) + return SDValue(); + uint64_t SizeVal = ConstantSize->getZExtValue(); + if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) + return SDValue(); + + /// If not DWORD aligned, call the library. + if ((Align & 3) != 0) + return SDValue(); + + // DWORD aligned + EVT AVT = MVT::i32; + if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned + AVT = MVT::i64; + + unsigned UBytes = AVT.getSizeInBits() / 8; + unsigned CountVal = SizeVal / UBytes; + SDValue Count = DAG.getIntPtrConstant(CountVal); + unsigned BytesLeft = SizeVal % UBytes; + + SDValue InFlag(0, 0); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : + X86::ECX, + Count, InFlag); + InFlag = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : + X86::EDI, + Dst, InFlag); + InFlag = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : + X86::ESI, + Src, InFlag); + InFlag = Chain.getValue(1); + + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; + SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops, + array_lengthof(Ops)); + + SmallVector<SDValue, 4> Results; + Results.push_back(RepMovs); + if (BytesLeft) { + // Handle the last 1 - 7 bytes. + unsigned Offset = SizeVal - BytesLeft; + EVT DstVT = Dst.getValueType(); + EVT SrcVT = Src.getValueType(); + EVT SizeVT = Size.getValueType(); + Results.push_back(DAG.getMemcpy(Chain, dl, + DAG.getNode(ISD::ADD, dl, DstVT, Dst, + DAG.getConstant(Offset, DstVT)), + DAG.getNode(ISD::ADD, dl, SrcVT, Src, + DAG.getConstant(Offset, SrcVT)), + DAG.getConstant(BytesLeft, SizeVT), + Align, isVolatile, AlwaysInline, + DstSV, DstSVOff + Offset, + SrcSV, SrcSVOff + Offset)); + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Results[0], Results.size()); +} diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h index 9834754..4f30f31 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.h +++ b/lib/Target/X86/X86SelectionDAGInfo.h @@ -18,10 +18,40 @@ namespace llvm { +class X86TargetLowering; +class X86TargetMachine; +class X86Subtarget; + class X86SelectionDAGInfo : public TargetSelectionDAGInfo { + /// Subtarget - Keep a pointer to the X86Subtarget around so that we can + /// make the right decision when generating code for different targets. + const X86Subtarget *Subtarget; + + const X86TargetLowering &TLI; + public: - X86SelectionDAGInfo(); + explicit X86SelectionDAGInfo(const X86TargetMachine &TM); ~X86SelectionDAGInfo(); + + virtual + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, + const Value *DstSV, + uint64_t DstSVOff) const; + + virtual + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const; }; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index f39904e..f2c5058 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -17,17 +17,13 @@ #include "llvm/PassManager.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" -#include "llvm/Support/CommandLine.h" - using namespace llvm; -static cl::opt<bool> DisableSSEDomain("disable-sse-domain", - cl::init(false), cl::Hidden, - cl::desc("Disable SSE Domain Fixing")); - static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { @@ -43,6 +39,18 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { } } +static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, + MCContext &Ctx, TargetAsmBackend &TAB, + raw_ostream &_OS, + MCCodeEmitter *_Emitter, + bool RelaxAll) { + Triple TheTriple(TT); + switch (TheTriple.getOS()) { + default: + return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll); + } +} + extern "C" void LLVMInitializeX86Target() { // Register the target. RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target); @@ -63,6 +71,12 @@ extern "C" void LLVMInitializeX86Target() { createX86_32AsmBackend); TargetRegistry::RegisterAsmBackend(TheX86_64Target, createX86_64AsmBackend); + + // Register the object streamer. + TargetRegistry::RegisterObjectStreamer(TheX86_32Target, + createMCStreamer); + TargetRegistry::RegisterObjectStreamer(TheX86_64Target, + createMCStreamer); } @@ -88,7 +102,8 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, Subtarget.getStackAlignment(), (Subtarget.isTargetWin64() ? -40 : (Subtarget.is64Bit() ? -8 : -4))), - InstrInfo(*this), JITInfo(*this), TLInfo(*this), ELFWriterInfo(*this) { + InstrInfo(*this), JITInfo(*this), TLInfo(*this), TSInfo(*this), + ELFWriterInfo(*this) { DefRelocModel = getRelocationModel(); // If no relocation model was picked, default as appropriate for the target. @@ -178,8 +193,7 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM, bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2() && - !DisableSSEDomain) { + if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) { PM.add(createSSEDomainFixPass()); return true; } diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index dc4234c..f9fb424 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -23,6 +23,7 @@ #include "X86JITInfo.h" #include "X86Subtarget.h" #include "X86ISelLowering.h" +#include "X86SelectionDAGInfo.h" namespace llvm { @@ -35,6 +36,7 @@ class X86TargetMachine : public LLVMTargetMachine { X86InstrInfo InstrInfo; X86JITInfo JITInfo; X86TargetLowering TLInfo; + X86SelectionDAGInfo TSInfo; X86ELFWriterInfo ELFWriterInfo; Reloc::Model DefRelocModel; // Reloc model before it's overridden. @@ -54,6 +56,9 @@ public: virtual const X86TargetLowering *getTargetLowering() const { return &TLInfo; } + virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { + return &TSInfo; + } virtual const X86RegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 3990b8b..b230572 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -80,7 +80,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setShiftAmountType(MVT::i32); setStackPointerRegisterToSaveRestore(XCore::SP); - setSchedulingPreference(SchedulingForRegPressure); + setSchedulingPreference(Sched::RegPressure); // Use i32 for setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index c983112..5260258 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -361,9 +361,8 @@ bool XCoreInstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { if (DestRC == SrcRC) { if (DestRC == XCore::GRRegsRegisterClass) { @@ -395,7 +394,8 @@ void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -408,7 +408,8 @@ void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -419,7 +420,8 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const { + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) { return true; } @@ -437,7 +439,7 @@ bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MBB.addLiveIn(it->getReg()); storeRegToStackSlot(MBB, MI, it->getReg(), true, - it->getFrameIdx(), it->getRegClass()); + it->getFrameIdx(), it->getRegClass(), &RI); if (emitFrameMoves) { MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol(); BuildMI(MBB, MI, DL, get(XCore::DBG_LABEL)).addSym(SaveLabel); @@ -449,7 +451,8 @@ bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { bool AtStart = MI == MBB.begin(); MachineBasicBlock::iterator BeforeI = MI; @@ -460,7 +463,7 @@ bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, loadRegFromStackSlot(MBB, MI, it->getReg(), it->getFrameIdx(), - it->getRegClass()); + it->getRegClass(), &RI); assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert multiple diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h index 3e0a765..9035ea9 100644 --- a/lib/Target/XCore/XCoreInstrInfo.h +++ b/lib/Target/XCore/XCoreInstrInfo.h @@ -67,25 +67,30 @@ public: MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; virtual bool ReverseBranchCondition( SmallVectorImpl<MachineOperand> &Cond) const; diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp index 6aac237..44aeb60 100644 --- a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp +++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "xcore-selectiondag-info" -#include "XCoreSelectionDAGInfo.h" +#include "XCoreTargetMachine.h" using namespace llvm; -XCoreSelectionDAGInfo::XCoreSelectionDAGInfo() { +XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const XCoreTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() { diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h index fd96716..0386968 100644 --- a/lib/Target/XCore/XCoreSelectionDAGInfo.h +++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h @@ -18,9 +18,11 @@ namespace llvm { +class XCoreTargetMachine; + class XCoreSelectionDAGInfo : public TargetSelectionDAGInfo { public: - XCoreSelectionDAGInfo(); + explicit XCoreSelectionDAGInfo(const XCoreTargetMachine &TM); ~XCoreSelectionDAGInfo(); }; diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 267f46a..b0013eb 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -28,7 +28,8 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT, "i16:16:32-i32:32:32-i64:32:32-n32"), InstrInfo(), FrameInfo(*this), - TLInfo(*this) { + TLInfo(*this), + TSInfo(*this) { } bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM, diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index 701a6f1..14073ba 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -20,6 +20,7 @@ #include "XCoreSubtarget.h" #include "XCoreInstrInfo.h" #include "XCoreISelLowering.h" +#include "XCoreSelectionDAGInfo.h" namespace llvm { @@ -29,6 +30,7 @@ class XCoreTargetMachine : public LLVMTargetMachine { XCoreInstrInfo InstrInfo; XCoreFrameInfo FrameInfo; XCoreTargetLowering TLInfo; + XCoreSelectionDAGInfo TSInfo; public: XCoreTargetMachine(const Target &T, const std::string &TT, const std::string &FS); @@ -40,6 +42,10 @@ public: return &TLInfo; } + virtual const XCoreSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + virtual const TargetRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 6443dd4..692e47d 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -535,14 +535,14 @@ void DAE::MarkValue(const RetOrArg &RA, Liveness L, /// values (according to Uses) live as well. void DAE::MarkLive(const Function &F) { DEBUG(dbgs() << "DAE - Intrinsically live fn: " << F.getName() << "\n"); - // Mark the function as live. - LiveFunctions.insert(&F); - // Mark all arguments as live. - for (unsigned i = 0, e = F.arg_size(); i != e; ++i) - PropagateLiveness(CreateArg(&F, i)); - // Mark all return values as live. - for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i) - PropagateLiveness(CreateRet(&F, i)); + // Mark the function as live. + LiveFunctions.insert(&F); + // Mark all arguments as live. + for (unsigned i = 0, e = F.arg_size(); i != e; ++i) + PropagateLiveness(CreateArg(&F, i)); + // Mark all return values as live. + for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i) + PropagateLiveness(CreateRet(&F, i)); } /// MarkLive - Mark the given return value or argument as live. Additionally, @@ -859,7 +859,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { Value *RetVal; - if (NFTy->getReturnType() == Type::getVoidTy(F->getContext())) { + if (NFTy->getReturnType()->isVoidTy()) { RetVal = 0; } else { assert (RetTy->isStructTy()); diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index bc8028c..8e312e7 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -54,6 +54,9 @@ namespace { return removeDeadFunctions(CG, &NeverInline); } virtual bool doInitialization(CallGraph &CG); + void releaseMemory() { + CA.clear(); + } }; } diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index 46cf4b2..74b4a1c 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -49,6 +49,9 @@ namespace { CA.growCachedCostInfo(Caller, Callee); } virtual bool doInitialization(CallGraph &CG); + void releaseMemory() { + CA.clear(); + } }; } diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index b07e22c..622a9b5 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -17,32 +17,55 @@ // important that the hash function be high quality. The equality comparison // iterates through each instruction in each basic block. // -// When a match is found, the functions are folded. We can only fold two -// functions when we know that the definition of one of them is not -// overridable. +// When a match is found the functions are folded. If both functions are +// overridable, we move the functionality into a new internal function and +// leave two overridable thunks to it. // //===----------------------------------------------------------------------===// // // Future work: // -// * fold vector<T*>::push_back and vector<S*>::push_back. -// -// These two functions have different types, but in a way that doesn't matter -// to us. As long as we never see an S or T itself, using S* and S** is the -// same as using a T* and T**. -// // * virtual functions. // // Many functions have their address taken by the virtual function table for // the object they belong to. However, as long as it's only used for a lookup // and call, this is irrelevant, and we'd like to fold such implementations. // +// * use SCC to cut down on pair-wise comparisons and solve larger cycles. +// +// The current implementation loops over a pair-wise comparison of all +// functions in the program where the two functions in the pair are treated as +// assumed to be equal until proven otherwise. We could both use fewer +// comparisons and optimize more complex cases if we used strongly connected +// components of the call graph. +// +// * be smarter about bitcast. +// +// In order to fold functions, we will sometimes add either bitcast instructions +// or bitcast constant expressions. Unfortunately, this can confound further +// analysis since the two functions differ where one has a bitcast and the +// other doesn't. We should learn to peer through bitcasts without imposing bad +// performance properties. +// +// * don't emit aliases for Mach-O. +// +// Mach-O doesn't support aliases which means that we must avoid introducing +// them in the bitcode on architectures which don't support them, such as +// Mac OSX. There's a few approaches to this problem; +// a) teach codegen to lower global aliases to thunks on platforms which don't +// support them. +// b) always emit thunks, and create a separate thunk-to-alias pass which +// runs on ELF systems. This has the added benefit of transforming other +// thunks such as those produced by a C++ frontend into aliases when legal +// to do so. +// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mergefunc" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Constants.h" #include "llvm/InlineAsm.h" @@ -54,6 +77,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" #include <map> #include <vector> using namespace llvm; @@ -61,17 +85,33 @@ using namespace llvm; STATISTIC(NumFunctionsMerged, "Number of functions merged"); namespace { - struct MergeFunctions : public ModulePass { + class MergeFunctions : public ModulePass { + public: static char ID; // Pass identification, replacement for typeid MergeFunctions() : ModulePass(&ID) {} bool runOnModule(Module &M); + + private: + bool isEquivalentGEP(const GetElementPtrInst *GEP1, + const GetElementPtrInst *GEP2); + + bool equals(const BasicBlock *BB1, const BasicBlock *BB2); + bool equals(const Function *F, const Function *G); + + bool compare(const Value *V1, const Value *V2); + + const Function *LHS, *RHS; + typedef DenseMap<const Value *, unsigned long> IDMap; + IDMap Map; + DenseMap<const Function *, IDMap> Domains; + DenseMap<const Function *, unsigned long> DomainCount; + TargetData *TD; }; } char MergeFunctions::ID = 0; -static RegisterPass<MergeFunctions> -X("mergefunc", "Merge Functions"); +static RegisterPass<MergeFunctions> X("mergefunc", "Merge Functions"); ModulePass *llvm::createMergeFunctionsPass() { return new MergeFunctions(); @@ -95,15 +135,6 @@ static unsigned long hash(const Function *F) { return ID.ComputeHash(); } -/// IgnoreBitcasts - given a bitcast, returns the first non-bitcast found by -/// walking the chain of cast operands. Otherwise, returns the argument. -static Value* IgnoreBitcasts(Value *V) { - while (BitCastInst *BC = dyn_cast<BitCastInst>(V)) - V = BC->getOperand(0); - - return V; -} - /// isEquivalentType - any two pointers are equivalent. Otherwise, standard /// type equivalence rules apply. static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { @@ -113,6 +144,14 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { return false; switch(Ty1->getTypeID()) { + default: + llvm_unreachable("Unknown type!"); + // Fall through in Release-Asserts mode. + case Type::IntegerTyID: + case Type::OpaqueTyID: + // Ty1 == Ty2 would have returned true earlier. + return false; + case Type::VoidTyID: case Type::FloatTyID: case Type::DoubleTyID: @@ -123,15 +162,6 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { case Type::MetadataTyID: return true; - case Type::IntegerTyID: - case Type::OpaqueTyID: - // Ty1 == Ty2 would have returned true earlier. - return false; - - default: - llvm_unreachable("Unknown type!"); - return false; - case Type::PointerTyID: { const PointerType *PTy1 = cast<PointerType>(Ty1); const PointerType *PTy2 = cast<PointerType>(Ty2); @@ -154,6 +184,21 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { return true; } + case Type::UnionTyID: { + const UnionType *UTy1 = cast<UnionType>(Ty1); + const UnionType *UTy2 = cast<UnionType>(Ty2); + + // TODO: we could be fancy with union(A, union(A, B)) === union(A, B), etc. + if (UTy1->getNumElements() != UTy2->getNumElements()) + return false; + + for (unsigned i = 0, e = UTy1->getNumElements(); i != e; ++i) { + if (!isEquivalentType(UTy1->getElementType(i), UTy2->getElementType(i))) + return false; + } + return true; + } + case Type::FunctionTyID: { const FunctionType *FTy1 = cast<FunctionType>(Ty1); const FunctionType *FTy2 = cast<FunctionType>(Ty2); @@ -236,123 +281,136 @@ isEquivalentOperation(const Instruction *I1, const Instruction *I2) { return true; } -static bool compare(const Value *V, const Value *U) { - assert(!isa<BasicBlock>(V) && !isa<BasicBlock>(U) && - "Must not compare basic blocks."); - - assert(isEquivalentType(V->getType(), U->getType()) && - "Two of the same operation have operands of different type."); +bool MergeFunctions::isEquivalentGEP(const GetElementPtrInst *GEP1, + const GetElementPtrInst *GEP2) { + if (TD && GEP1->hasAllConstantIndices() && GEP2->hasAllConstantIndices()) { + SmallVector<Value *, 8> Indices1, Indices2; + for (GetElementPtrInst::const_op_iterator I = GEP1->idx_begin(), + E = GEP1->idx_end(); I != E; ++I) { + Indices1.push_back(*I); + } + for (GetElementPtrInst::const_op_iterator I = GEP2->idx_begin(), + E = GEP2->idx_end(); I != E; ++I) { + Indices2.push_back(*I); + } + uint64_t Offset1 = TD->getIndexedOffset(GEP1->getPointerOperandType(), + Indices1.data(), Indices1.size()); + uint64_t Offset2 = TD->getIndexedOffset(GEP2->getPointerOperandType(), + Indices2.data(), Indices2.size()); + return Offset1 == Offset2; + } - // TODO: If the constant is an expression of F, we should accept that it's - // equal to the same expression in terms of G. - if (isa<Constant>(V)) - return V == U; + // Equivalent types aren't enough. + if (GEP1->getPointerOperand()->getType() != + GEP2->getPointerOperand()->getType()) + return false; - // The caller has ensured that ValueMap[V] != U. Since Arguments are - // pre-loaded into the ValueMap, and Instructions are added as we go, we know - // that this can only be a mis-match. - if (isa<Instruction>(V) || isa<Argument>(V)) + if (GEP1->getNumOperands() != GEP2->getNumOperands()) return false; - if (isa<InlineAsm>(V) && isa<InlineAsm>(U)) { - const InlineAsm *IAF = cast<InlineAsm>(V); - const InlineAsm *IAG = cast<InlineAsm>(U); - return IAF->getAsmString() == IAG->getAsmString() && - IAF->getConstraintString() == IAG->getConstraintString(); + for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) { + if (!compare(GEP1->getOperand(i), GEP2->getOperand(i))) + return false; } - return false; + return true; } -static bool equals(const BasicBlock *BB1, const BasicBlock *BB2, - DenseMap<const Value *, const Value *> &ValueMap, - DenseMap<const Value *, const Value *> &SpeculationMap) { - // Speculatively add it anyways. If it's false, we'll notice a difference - // later, and this won't matter. - ValueMap[BB1] = BB2; +bool MergeFunctions::compare(const Value *V1, const Value *V2) { + if (V1 == LHS || V1 == RHS) + if (V2 == LHS || V2 == RHS) + return true; - BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end(); - BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end(); + // TODO: constant expressions in terms of LHS and RHS + if (isa<Constant>(V1)) + return V1 == V2; - do { - if (isa<BitCastInst>(FI)) { - ++FI; - continue; - } - if (isa<BitCastInst>(GI)) { - ++GI; - continue; - } + if (isa<InlineAsm>(V1) && isa<InlineAsm>(V2)) { + const InlineAsm *IA1 = cast<InlineAsm>(V1); + const InlineAsm *IA2 = cast<InlineAsm>(V2); + return IA1->getAsmString() == IA2->getAsmString() && + IA1->getConstraintString() == IA2->getConstraintString(); + } - if (!isEquivalentOperation(FI, GI)) - return false; + // We enumerate constants globally and arguments, basic blocks or + // instructions within the function they belong to. + const Function *Domain1 = NULL; + if (const Argument *A = dyn_cast<Argument>(V1)) { + Domain1 = A->getParent(); + } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V1)) { + Domain1 = BB->getParent(); + } else if (const Instruction *I = dyn_cast<Instruction>(V1)) { + Domain1 = I->getParent()->getParent(); + } - if (isa<GetElementPtrInst>(FI)) { - const GetElementPtrInst *GEPF = cast<GetElementPtrInst>(FI); - const GetElementPtrInst *GEPG = cast<GetElementPtrInst>(GI); - if (GEPF->hasAllZeroIndices() && GEPG->hasAllZeroIndices()) { - // It's effectively a bitcast. - ++FI, ++GI; - continue; - } + const Function *Domain2 = NULL; + if (const Argument *A = dyn_cast<Argument>(V2)) { + Domain2 = A->getParent(); + } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V2)) { + Domain2 = BB->getParent(); + } else if (const Instruction *I = dyn_cast<Instruction>(V2)) { + Domain2 = I->getParent()->getParent(); + } - // TODO: we only really care about the elements before the index - if (FI->getOperand(0)->getType() != GI->getOperand(0)->getType()) + if (Domain1 != Domain2) + if (Domain1 != LHS && Domain1 != RHS) + if (Domain2 != LHS && Domain2 != RHS) return false; - } - if (ValueMap[FI] == GI) { - ++FI, ++GI; - continue; - } + IDMap &Map1 = Domains[Domain1]; + unsigned long &ID1 = Map1[V1]; + if (!ID1) + ID1 = ++DomainCount[Domain1]; - if (ValueMap[FI] != NULL) - return false; + IDMap &Map2 = Domains[Domain2]; + unsigned long &ID2 = Map2[V2]; + if (!ID2) + ID2 = ++DomainCount[Domain2]; - for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) { - Value *OpF = IgnoreBitcasts(FI->getOperand(i)); - Value *OpG = IgnoreBitcasts(GI->getOperand(i)); + return ID1 == ID2; +} - if (ValueMap[OpF] == OpG) - continue; +bool MergeFunctions::equals(const BasicBlock *BB1, const BasicBlock *BB2) { + BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end(); + BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end(); - if (ValueMap[OpF] != NULL) + do { + if (!compare(FI, GI)) + return false; + + if (isa<GetElementPtrInst>(FI) && isa<GetElementPtrInst>(GI)) { + const GetElementPtrInst *GEP1 = cast<GetElementPtrInst>(FI); + const GetElementPtrInst *GEP2 = cast<GetElementPtrInst>(GI); + + if (!compare(GEP1->getPointerOperand(), GEP2->getPointerOperand())) return false; - if (OpF->getValueID() != OpG->getValueID() || - !isEquivalentType(OpF->getType(), OpG->getType())) + if (!isEquivalentGEP(GEP1, GEP2)) + return false; + } else { + if (!isEquivalentOperation(FI, GI)) return false; - if (isa<PHINode>(FI)) { - if (SpeculationMap[OpF] == NULL) - SpeculationMap[OpF] = OpG; - else if (SpeculationMap[OpF] != OpG) - return false; - continue; - } else if (isa<BasicBlock>(OpF)) { - assert(isa<TerminatorInst>(FI) && - "BasicBlock referenced by non-Terminator non-PHI"); - // This call changes the ValueMap, hence we can't use - // Value *& = ValueMap[...] - if (!equals(cast<BasicBlock>(OpF), cast<BasicBlock>(OpG), ValueMap, - SpeculationMap)) - return false; - } else { + for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) { + Value *OpF = FI->getOperand(i); + Value *OpG = GI->getOperand(i); + if (!compare(OpF, OpG)) return false; - } - ValueMap[OpF] = OpG; + if (OpF->getValueID() != OpG->getValueID() || + !isEquivalentType(OpF->getType(), OpG->getType())) + return false; + } } - ValueMap[FI] = GI; ++FI, ++GI; } while (FI != FE && GI != GE); return FI == FE && GI == GE; } -static bool equals(const Function *F, const Function *G) { +bool MergeFunctions::equals(const Function *F, const Function *G) { // We need to recheck everything, but check the things that weren't included // in the hash first. @@ -382,27 +440,46 @@ static bool equals(const Function *F, const Function *G) { if (!isEquivalentType(F->getFunctionType(), G->getFunctionType())) return false; - DenseMap<const Value *, const Value *> ValueMap; - DenseMap<const Value *, const Value *> SpeculationMap; - ValueMap[F] = G; - assert(F->arg_size() == G->arg_size() && "Identical functions have a different number of args."); - for (Function::const_arg_iterator fi = F->arg_begin(), gi = G->arg_begin(), - fe = F->arg_end(); fi != fe; ++fi, ++gi) - ValueMap[fi] = gi; + LHS = F; + RHS = G; - if (!equals(&F->getEntryBlock(), &G->getEntryBlock(), ValueMap, - SpeculationMap)) - return false; + // Visit the arguments so that they get enumerated in the order they're + // passed in. + for (Function::const_arg_iterator fi = F->arg_begin(), gi = G->arg_begin(), + fe = F->arg_end(); fi != fe; ++fi, ++gi) { + if (!compare(fi, gi)) + llvm_unreachable("Arguments repeat"); + } - for (DenseMap<const Value *, const Value *>::iterator - I = SpeculationMap.begin(), E = SpeculationMap.end(); I != E; ++I) { - if (ValueMap[I->first] != I->second) + SmallVector<const BasicBlock *, 8> FBBs, GBBs; + SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F. + FBBs.push_back(&F->getEntryBlock()); + GBBs.push_back(&G->getEntryBlock()); + VisitedBBs.insert(FBBs[0]); + while (!FBBs.empty()) { + const BasicBlock *FBB = FBBs.pop_back_val(); + const BasicBlock *GBB = GBBs.pop_back_val(); + if (!compare(FBB, GBB) || !equals(FBB, GBB)) { + Domains.clear(); + DomainCount.clear(); return false; + } + const TerminatorInst *FTI = FBB->getTerminator(); + const TerminatorInst *GTI = GBB->getTerminator(); + assert(FTI->getNumSuccessors() == GTI->getNumSuccessors()); + for (unsigned i = 0, e = FTI->getNumSuccessors(); i != e; ++i) { + if (!VisitedBBs.insert(FTI->getSuccessor(i))) + continue; + FBBs.push_back(FTI->getSuccessor(i)); + GBBs.push_back(GTI->getSuccessor(i)); + } } + Domains.clear(); + DomainCount.clear(); return true; } @@ -476,20 +553,32 @@ static LinkageCategory categorize(const Function *F) { } static void ThunkGToF(Function *F, Function *G) { + if (!G->mayBeOverridden()) { + // Redirect direct callers of G to F. + Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); + for (Value::use_iterator UI = G->use_begin(), UE = G->use_end(); + UI != UE;) { + Value::use_iterator TheIter = UI; + ++UI; + CallSite CS(*TheIter); + if (CS && CS.isCallee(TheIter)) + TheIter.getUse().set(BitcastF); + } + } + Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "", G->getParent()); BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG); - std::vector<Value *> Args; + SmallVector<Value *, 16> Args; unsigned i = 0; const FunctionType *FFTy = F->getFunctionType(); for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end(); AI != AE; ++AI) { - if (FFTy->getParamType(i) == AI->getType()) + if (FFTy->getParamType(i) == AI->getType()) { Args.push_back(AI); - else { - Value *BCI = new BitCastInst(AI, FFTy->getParamType(i), "", BB); - Args.push_back(BCI); + } else { + Args.push_back(new BitCastInst(AI, FFTy->getParamType(i), "", BB)); } ++i; } @@ -510,8 +599,6 @@ static void ThunkGToF(Function *F, Function *G) { NewG->takeName(G); G->replaceAllUsesWith(NewG); G->eraseFromParent(); - - // TODO: look at direct callers to G and make them all direct callers to F. } static void AliasGToF(Function *F, Function *G) { @@ -542,67 +629,66 @@ static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) { } switch (catF) { + case ExternalStrong: + switch (catG) { case ExternalStrong: - switch (catG) { - case ExternalStrong: - case ExternalWeak: - ThunkGToF(F, G); - break; - case Internal: - if (G->hasAddressTaken()) - ThunkGToF(F, G); - else - AliasGToF(F, G); - break; - } + case ExternalWeak: + ThunkGToF(F, G); break; + case Internal: + if (G->hasAddressTaken()) + ThunkGToF(F, G); + else + AliasGToF(F, G); + break; + } + break; - case ExternalWeak: { - assert(catG == ExternalWeak); + case ExternalWeak: { + assert(catG == ExternalWeak); - // Make them both thunks to the same internal function. - F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); - Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", - F->getParent()); - H->copyAttributesFrom(F); - H->takeName(F); - F->replaceAllUsesWith(H); + // Make them both thunks to the same internal function. + F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); + Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", + F->getParent()); + H->copyAttributesFrom(F); + H->takeName(F); + F->replaceAllUsesWith(H); - ThunkGToF(F, G); - ThunkGToF(F, H); + ThunkGToF(F, G); + ThunkGToF(F, H); - F->setLinkage(GlobalValue::InternalLinkage); - } break; + F->setLinkage(GlobalValue::InternalLinkage); + } break; - case Internal: - switch (catG) { - case ExternalStrong: - llvm_unreachable(0); - // fall-through - case ExternalWeak: - if (F->hasAddressTaken()) - ThunkGToF(F, G); - else - AliasGToF(F, G); - break; - case Internal: { - bool addrTakenF = F->hasAddressTaken(); - bool addrTakenG = G->hasAddressTaken(); - if (!addrTakenF && addrTakenG) { - std::swap(FnVec[i], FnVec[j]); - std::swap(F, G); - std::swap(addrTakenF, addrTakenG); - } + case Internal: + switch (catG) { + case ExternalStrong: + llvm_unreachable(0); + // fall-through + case ExternalWeak: + if (F->hasAddressTaken()) + ThunkGToF(F, G); + else + AliasGToF(F, G); + break; + case Internal: { + bool addrTakenF = F->hasAddressTaken(); + bool addrTakenG = G->hasAddressTaken(); + if (!addrTakenF && addrTakenG) { + std::swap(FnVec[i], FnVec[j]); + std::swap(F, G); + std::swap(addrTakenF, addrTakenG); + } - if (addrTakenF && addrTakenG) { - ThunkGToF(F, G); - } else { - assert(!addrTakenG); - AliasGToF(F, G); - } - } break; + if (addrTakenF && addrTakenG) { + ThunkGToF(F, G); + } else { + assert(!addrTakenG); + AliasGToF(F, G); } - break; + } break; + } break; } ++NumFunctionsMerged; @@ -619,22 +705,20 @@ bool MergeFunctions::runOnModule(Module &M) { std::map<unsigned long, std::vector<Function *> > FnMap; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration() || F->isIntrinsic()) + if (F->isDeclaration()) continue; FnMap[hash(F)].push_back(F); } - // TODO: instead of running in a loop, we could also fold functions in - // callgraph order. Constructing the CFG probably isn't cheaper than just - // running in a loop, unless it happened to already be available. + TD = getAnalysisIfAvailable<TargetData>(); bool LocalChanged; do { LocalChanged = false; DEBUG(dbgs() << "size: " << FnMap.size() << "\n"); for (std::map<unsigned long, std::vector<Function *> >::iterator - I = FnMap.begin(), E = FnMap.end(); I != E; ++I) { + I = FnMap.begin(), E = FnMap.end(); I != E; ++I) { std::vector<Function *> &FnVec = I->second; DEBUG(dbgs() << "hash (" << I->first << "): " << FnVec.size() << "\n"); diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 310e4a2..6bc8e66 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -229,6 +229,12 @@ static bool StripDebugInfo(Module &M) { NMD->eraseFromParent(); } + NMD = M.getNamedMetadata("llvm.dbg.lv"); + if (NMD) { + Changed = true; + NMD->eraseFromParent(); + } + unsigned MDDbgKind = M.getMDKindID("dbg"); for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index bd06499..c7b04a4 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -51,7 +51,7 @@ static inline unsigned getComplexity(Value *V) { /// InstCombineIRInserter - This is an IRBuilder insertion helper that works /// just like the normal insertion helper, but also adds any new instructions /// to the instcombine worklist. -class VISIBILITY_HIDDEN InstCombineIRInserter +class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter : public IRBuilderDefaultInserter<true> { InstCombineWorklist &Worklist; public: @@ -65,7 +65,7 @@ public: }; /// InstCombiner - The -instcombine pass. -class VISIBILITY_HIDDEN InstCombiner +class LLVM_LIBRARY_VISIBILITY InstCombiner : public FunctionPass, public InstVisitor<InstCombiner, Instruction*> { TargetData *TD; diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index eb7628e..b0137c4 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -442,7 +442,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // If this cast is a truncate, evaluting in a different type always // eliminates the cast, so it is always a win. DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" - " to avoid cast: " << CI); + " to avoid cast: " << CI << '\n'); Value *Res = EvaluateInDifferentType(Src, DestTy, false); assert(Res->getType() == DestTy); return ReplaceInstUsesWith(CI, Res); @@ -1252,6 +1252,64 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { return commonPointerCastTransforms(CI); } +/// OptimizeVectorResize - This input value (which is known to have vector type) +/// is being zero extended or truncated to the specified vector type. Try to +/// replace it with a shuffle (and vector/vector bitcast) if possible. +/// +/// The source and destination vector types may have different element types. +static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy, + InstCombiner &IC) { + // We can only do this optimization if the output is a multiple of the input + // element size, or the input is a multiple of the output element size. + // Convert the input type to have the same element type as the output. + const VectorType *SrcTy = cast<VectorType>(InVal->getType()); + + if (SrcTy->getElementType() != DestTy->getElementType()) { + // The input types don't need to be identical, but for now they must be the + // same size. There is no specific reason we couldn't handle things like + // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten + // there yet. + if (SrcTy->getElementType()->getPrimitiveSizeInBits() != + DestTy->getElementType()->getPrimitiveSizeInBits()) + return 0; + + SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements()); + InVal = IC.Builder->CreateBitCast(InVal, SrcTy); + } + + // Now that the element types match, get the shuffle mask and RHS of the + // shuffle to use, which depends on whether we're increasing or decreasing the + // size of the input. + SmallVector<Constant*, 16> ShuffleMask; + Value *V2; + const IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext()); + + if (SrcTy->getNumElements() > DestTy->getNumElements()) { + // If we're shrinking the number of elements, just shuffle in the low + // elements from the input and use undef as the second shuffle input. + V2 = UndefValue::get(SrcTy); + for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i) + ShuffleMask.push_back(ConstantInt::get(Int32Ty, i)); + + } else { + // If we're increasing the number of elements, shuffle in all of the + // elements from InVal and fill the rest of the result elements with zeros + // from a constant zero. + V2 = Constant::getNullValue(SrcTy); + unsigned SrcElts = SrcTy->getNumElements(); + for (unsigned i = 0, e = SrcElts; i != e; ++i) + ShuffleMask.push_back(ConstantInt::get(Int32Ty, i)); + + // The excess elements reference the first element of the zero input. + ShuffleMask.append(DestTy->getNumElements()-SrcElts, + ConstantInt::get(Int32Ty, SrcElts)); + } + + Constant *Mask = ConstantVector::get(ShuffleMask.data(), ShuffleMask.size()); + return new ShuffleVectorInst(InVal, V2, Mask); +} + + Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If the operands are integer typed then apply the integer transforms, // otherwise just apply the common ones. @@ -1310,6 +1368,18 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) } + + // If this is a cast from an integer to vector, check to see if the input + // is a trunc or zext of a bitcast from vector. If so, we can replace all + // the casts with a shuffle and (potentially) a bitcast. + if (isa<IntegerType>(SrcTy) && (isa<TruncInst>(Src) || isa<ZExtInst>(Src))){ + CastInst *SrcCast = cast<CastInst>(Src); + if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0))) + if (isa<VectorType>(BCIn->getOperand(0)->getType())) + if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0), + cast<VectorType>(DestTy), *this)) + return I; + } } if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) { diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h index 9d88621..9100a85 100644 --- a/lib/Transforms/InstCombine/InstCombineWorklist.h +++ b/lib/Transforms/InstCombine/InstCombineWorklist.h @@ -22,7 +22,7 @@ namespace llvm { /// InstCombineWorklist - This is the worklist management logic for /// InstCombine. -class VISIBILITY_HIDDEN InstCombineWorklist { +class LLVM_LIBRARY_VISIBILITY InstCombineWorklist { SmallVector<Instruction*, 256> Worklist; DenseMap<Instruction*, unsigned> WorklistMap; diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index 5778864..1a3b10c 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -26,6 +26,7 @@ add_llvm_library(LLVMScalarOpts SimplifyCFGPass.cpp SimplifyHalfPowrLibCalls.cpp SimplifyLibCalls.cpp + Sink.cpp TailDuplication.cpp TailRecursionElimination.cpp ) diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 65b34b1..ca8ab49 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -868,7 +868,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, const Type *StoredValTy = StoredVal->getType(); - uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy); + uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy); uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); // If the store and reload are the same size, we can always reuse it. @@ -1132,8 +1132,8 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, Instruction *InsertPt, const TargetData &TD){ LLVMContext &Ctx = SrcVal->getType()->getContext(); - uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8; - uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; + uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; + uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8; IRBuilder<> Builder(InsertPt->getParent(), InsertPt); @@ -1604,7 +1604,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, } } if (!NeedToSplit.empty()) { - toSplit.append(NeedToSplit.size(), NeedToSplit.front()); + toSplit.append(NeedToSplit.begin(), NeedToSplit.end()); return false; } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index cf3d16f..86ea3eb 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -107,11 +107,13 @@ namespace { class RegUseTracker { typedef DenseMap<const SCEV *, RegSortData> RegUsesTy; - RegUsesTy RegUses; + RegUsesTy RegUsesMap; SmallVector<const SCEV *, 16> RegSequence; public: void CountRegister(const SCEV *Reg, size_t LUIdx); + void DropRegister(const SCEV *Reg, size_t LUIdx); + void DropUse(size_t LUIdx); bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const; @@ -132,7 +134,7 @@ public: void RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) { std::pair<RegUsesTy::iterator, bool> Pair = - RegUses.insert(std::make_pair(Reg, RegSortData())); + RegUsesMap.insert(std::make_pair(Reg, RegSortData())); RegSortData &RSD = Pair.first->second; if (Pair.second) RegSequence.push_back(Reg); @@ -140,11 +142,28 @@ RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) { RSD.UsedByIndices.set(LUIdx); } +void +RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) { + RegUsesTy::iterator It = RegUsesMap.find(Reg); + assert(It != RegUsesMap.end()); + RegSortData &RSD = It->second; + assert(RSD.UsedByIndices.size() > LUIdx); + RSD.UsedByIndices.reset(LUIdx); +} + +void +RegUseTracker::DropUse(size_t LUIdx) { + // Remove the use index from every register's use list. + for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end(); + I != E; ++I) + I->second.UsedByIndices.reset(LUIdx); +} + bool RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const { - if (!RegUses.count(Reg)) return false; + if (!RegUsesMap.count(Reg)) return false; const SmallBitVector &UsedByIndices = - RegUses.find(Reg)->second.UsedByIndices; + RegUsesMap.find(Reg)->second.UsedByIndices; int i = UsedByIndices.find_first(); if (i == -1) return false; if ((size_t)i != LUIdx) return true; @@ -152,13 +171,13 @@ RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const { } const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const { - RegUsesTy::const_iterator I = RegUses.find(Reg); - assert(I != RegUses.end() && "Unknown register!"); + RegUsesTy::const_iterator I = RegUsesMap.find(Reg); + assert(I != RegUsesMap.end() && "Unknown register!"); return I->second.UsedByIndices; } void RegUseTracker::clear() { - RegUses.clear(); + RegUsesMap.clear(); RegSequence.clear(); } @@ -188,6 +207,8 @@ struct Formula { unsigned getNumRegs() const; const Type *getType() const; + void DeleteBaseReg(const SCEV *&S); + bool referencesReg(const SCEV *S) const; bool hasRegsUsedByUsesOtherThan(size_t LUIdx, const RegUseTracker &RegUses) const; @@ -291,6 +312,13 @@ const Type *Formula::getType() const { 0; } +/// DeleteBaseReg - Delete the given base reg from the BaseRegs list. +void Formula::DeleteBaseReg(const SCEV *&S) { + if (&S != &BaseRegs.back()) + std::swap(S, BaseRegs.back()); + BaseRegs.pop_back(); +} + /// referencesReg - Test if this formula references the given register. bool Formula::referencesReg(const SCEV *S) const { return S == ScaledReg || @@ -326,6 +354,13 @@ void Formula::print(raw_ostream &OS) const { if (!First) OS << " + "; else First = false; OS << "reg(" << **I << ')'; } + if (AM.HasBaseReg && BaseRegs.empty()) { + if (!First) OS << " + "; else First = false; + OS << "**error: HasBaseReg**"; + } else if (!AM.HasBaseReg && !BaseRegs.empty()) { + if (!First) OS << " + "; else First = false; + OS << "**error: !HasBaseReg**"; + } if (AM.Scale != 0) { if (!First) OS << " + "; else First = false; OS << AM.Scale << "*reg("; @@ -345,8 +380,7 @@ void Formula::dump() const { /// without changing its value. static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { const Type *WideTy = - IntegerType::get(SE.getContext(), - SE.getTypeSizeInBits(AR->getType()) + 1); + IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1); return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy)); } @@ -354,8 +388,7 @@ static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { /// without changing its value. static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) { const Type *WideTy = - IntegerType::get(SE.getContext(), - SE.getTypeSizeInBits(A->getType()) + 1); + IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1); return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy)); } @@ -363,8 +396,7 @@ static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) { /// without changing its value. static bool isMulSExtable(const SCEVMulExpr *A, ScalarEvolution &SE) { const Type *WideTy = - IntegerType::get(SE.getContext(), - SE.getTypeSizeInBits(A->getType()) + 1); + IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1); return isa<SCEVMulExpr>(SE.getSignExtendExpr(A, WideTy)); } @@ -432,14 +464,14 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, bool Found = false; for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end(); I != E; ++I) { + const SCEV *S = *I; if (!Found) - if (const SCEV *Q = getExactSDiv(*I, RHS, SE, + if (const SCEV *Q = getExactSDiv(S, RHS, SE, IgnoreSignificantBits)) { - Ops.push_back(Q); + S = Q; Found = true; - continue; } - Ops.push_back(*I); + Ops.push_back(S); } return Found ? SE.getMulExpr(Ops) : 0; } @@ -810,8 +842,7 @@ struct LSRFixup { } LSRFixup::LSRFixup() - : UserInst(0), OperandValToReplace(0), - LUIdx(~size_t(0)), Offset(0) {} + : UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {} /// isUseFullyOutsideLoop - Test whether this fixup always uses its /// value outside of the given loop. @@ -934,7 +965,10 @@ public: MaxOffset(INT64_MIN), AllFixupsOutsideLoop(true) {} + bool HasFormulaWithSameRegs(const Formula &F) const; bool InsertFormula(const Formula &F); + void DeleteFormula(Formula &F); + void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses); void check() const; @@ -942,6 +976,16 @@ public: void dump() const; }; +/// HasFormula - Test whether this use as a formula which has the same +/// registers as the given formula. +bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { + SmallVector<const SCEV *, 2> Key = F.BaseRegs; + if (F.ScaledReg) Key.push_back(F.ScaledReg); + // Unstable sort by host order ok, because this is only used for uniquifying. + std::sort(Key.begin(), Key.end()); + return Uniquifier.count(Key); +} + /// InsertFormula - If the given formula has not yet been inserted, add it to /// the list, and return true. Return false otherwise. bool LSRUse::InsertFormula(const Formula &F) { @@ -972,6 +1016,33 @@ bool LSRUse::InsertFormula(const Formula &F) { return true; } +/// DeleteFormula - Remove the given formula from this use's list. +void LSRUse::DeleteFormula(Formula &F) { + if (&F != &Formulae.back()) + std::swap(F, Formulae.back()); + Formulae.pop_back(); + assert(!Formulae.empty() && "LSRUse has no formulae left!"); +} + +/// RecomputeRegs - Recompute the Regs field, and update RegUses. +void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) { + // Now that we've filtered out some formulae, recompute the Regs set. + SmallPtrSet<const SCEV *, 4> OldRegs = Regs; + Regs.clear(); + for (SmallVectorImpl<Formula>::const_iterator I = Formulae.begin(), + E = Formulae.end(); I != E; ++I) { + const Formula &F = *I; + if (F.ScaledReg) Regs.insert(F.ScaledReg); + Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); + } + + // Update the RegTracker. + for (SmallPtrSet<const SCEV *, 4>::iterator I = OldRegs.begin(), + E = OldRegs.end(); I != E; ++I) + if (!Regs.count(*I)) + RegUses.DropRegister(*I, LUIdx); +} + void LSRUse::print(raw_ostream &OS) const { OS << "LSR Use: Kind="; switch (Kind) { @@ -1091,6 +1162,13 @@ static bool isAlwaysFoldable(int64_t BaseOffs, AM.HasBaseReg = HasBaseReg; AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1; + // Canonicalize a scale of 1 to a base register if the formula doesn't + // already have a base register. + if (!AM.HasBaseReg && AM.Scale == 1) { + AM.Scale = 0; + AM.HasBaseReg = true; + } + return isLegalUse(AM, Kind, AccessTy, TLI); } @@ -1186,7 +1264,7 @@ class LSRInstance { void OptimizeShadowIV(); bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse); ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse); - bool OptimizeLoopTermCond(); + void OptimizeLoopTermCond(); void CollectInterestingTypesAndFactors(); void CollectFixupsAndInitialFormulae(); @@ -1200,13 +1278,17 @@ class LSRInstance { typedef DenseMap<const SCEV *, size_t> UseMapTy; UseMapTy UseMap; - bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, + bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, LSRUse::KindType Kind, const Type *AccessTy); std::pair<size_t, int64_t> getUse(const SCEV *&Expr, LSRUse::KindType Kind, const Type *AccessTy); + void DeleteUse(LSRUse &LU); + + LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU); + public: void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); @@ -1227,6 +1309,8 @@ public: void GenerateAllReuseFormulae(); void FilterOutUndesirableDedicatedRegisters(); + + size_t EstimateSearchSpaceComplexity() const; void NarrowSearchSpaceUsingHeuristics(); void SolveRecurse(SmallVectorImpl<const Formula *> &Solution, @@ -1375,6 +1459,7 @@ void LSRInstance::OptimizeShadowIV() { /* Remove cast operation */ ShadowUse->replaceAllUsesWith(NewPH); ShadowUse->eraseFromParent(); + Changed = true; break; } } @@ -1382,8 +1467,7 @@ void LSRInstance::OptimizeShadowIV() { /// FindIVUserForCond - If Cond has an operand that is an expression of an IV, /// set the IV user and stride information and return true, otherwise return /// false. -bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, - IVStrideUse *&CondUse) { +bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) { for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) if (UI->getUser() == Cond) { // NOTE: we could handle setcc instructions with multiple uses here, but @@ -1555,7 +1639,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { /// OptimizeLoopTermCond - Change loop terminating condition to use the /// postinc iv when possible. -bool +void LSRInstance::OptimizeLoopTermCond() { SmallPtrSet<Instruction *, 4> PostIncs; @@ -1621,13 +1705,13 @@ LSRInstance::OptimizeLoopTermCond() { } if (const SCEVConstant *D = dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) { + const ConstantInt *C = D->getValue(); // Stride of one or negative one can have reuse with non-addresses. - if (D->getValue()->isOne() || - D->getValue()->isAllOnesValue()) + if (C->isOne() || C->isAllOnesValue()) goto decline_post_inc; // Avoid weird situations. - if (D->getValue()->getValue().getMinSignedBits() >= 64 || - D->getValue()->getValue().isMinSignedValue()) + if (C->getValue().getMinSignedBits() >= 64 || + C->getValue().isMinSignedValue()) goto decline_post_inc; // Without TLI, assume that any stride might be valid, and so any // use might be shared. @@ -1636,7 +1720,7 @@ LSRInstance::OptimizeLoopTermCond() { // Check for possible scaled-address reuse. const Type *AccessTy = getAccessType(UI->getUser()); TargetLowering::AddrMode AM; - AM.Scale = D->getValue()->getSExtValue(); + AM.Scale = C->getSExtValue(); if (TLI->isLegalAddressingMode(AM, AccessTy)) goto decline_post_inc; AM.Scale = -AM.Scale; @@ -1691,12 +1775,13 @@ LSRInstance::OptimizeLoopTermCond() { else if (BB != IVIncInsertPos->getParent()) IVIncInsertPos = BB->getTerminator(); } - - return Changed; } +/// reconcileNewOffset - Determine if the given use can accomodate a fixup +/// at the given offset and other details. If so, update the use and +/// return true. bool -LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, +LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, LSRUse::KindType Kind, const Type *AccessTy) { int64_t NewMinOffset = LU.MinOffset; int64_t NewMaxOffset = LU.MaxOffset; @@ -1709,12 +1794,12 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, return false; // Conservatively assume HasBaseReg is true for now. if (NewOffset < LU.MinOffset) { - if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, /*HasBaseReg=*/true, + if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg, Kind, AccessTy, TLI)) return false; NewMinOffset = NewOffset; } else if (NewOffset > LU.MaxOffset) { - if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, /*HasBaseReg=*/true, + if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg, Kind, AccessTy, TLI)) return false; NewMaxOffset = NewOffset; @@ -1753,7 +1838,7 @@ LSRInstance::getUse(const SCEV *&Expr, // A use already existed with this base. size_t LUIdx = P.first->second; LSRUse &LU = Uses[LUIdx]; - if (reconcileNewOffset(LU, Offset, Kind, AccessTy)) + if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy)) // Reuse this use. return std::make_pair(LUIdx, Offset); } @@ -1774,6 +1859,47 @@ LSRInstance::getUse(const SCEV *&Expr, return std::make_pair(LUIdx, Offset); } +/// DeleteUse - Delete the given use from the Uses list. +void LSRInstance::DeleteUse(LSRUse &LU) { + if (&LU != &Uses.back()) + std::swap(LU, Uses.back()); + Uses.pop_back(); +} + +/// FindUseWithFormula - Look for a use distinct from OrigLU which is has +/// a formula that has the same registers as the given formula. +LSRUse * +LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, + const LSRUse &OrigLU) { + // Search all uses for the formula. This could be more clever. Ignore + // ICmpZero uses because they may contain formulae generated by + // GenerateICmpZeroScales, in which case adding fixup offsets may + // be invalid. + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; + if (&LU != &OrigLU && + LU.Kind != LSRUse::ICmpZero && + LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy && + LU.HasFormulaWithSameRegs(OrigF)) { + for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(), + E = LU.Formulae.end(); I != E; ++I) { + const Formula &F = *I; + if (F.BaseRegs == OrigF.BaseRegs && + F.ScaledReg == OrigF.ScaledReg && + F.AM.BaseGV == OrigF.AM.BaseGV && + F.AM.Scale == OrigF.AM.Scale && + LU.Kind) { + if (F.AM.BaseOffs == 0) + return &LU; + break; + } + } + } + } + + return 0; +} + void LSRInstance::CollectInterestingTypesAndFactors() { SmallSetVector<const SCEV *, 4> Strides; @@ -1867,6 +1993,8 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { if (NV == LF.OperandValToReplace) { CI->setOperand(1, CI->getOperand(0)); CI->setOperand(0, NV); + NV = CI->getOperand(1); + Changed = true; } // x == y --> x - y == 0 @@ -1901,6 +2029,9 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { DEBUG(print_fixups(dbgs())); } +/// InsertInitialFormula - Insert a formula for the given expression into +/// the given use, separating out loop-variant portions from loop-invariant +/// and loop-computable portions. void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { Formula F; @@ -1909,6 +2040,8 @@ LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { assert(Inserted && "Initial formula already exists!"); (void)Inserted; } +/// InsertSupplementalFormula - Insert a simple single-register formula for +/// the given expression into the given use. void LSRInstance::InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { @@ -2265,8 +2398,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, /// GenerateScales - Generate stride factor reuse formulae by making use of /// scaled-offset address modes, for example. -void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, - Formula Base) { +void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { // Determine the integer type for the base formula. const Type *IntTy = Base.getType(); if (!IntTy) return; @@ -2312,8 +2444,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, // TODO: This could be optimized to avoid all the copying. Formula F = Base; F.ScaledReg = Quotient; - std::swap(F.BaseRegs[i], F.BaseRegs.back()); - F.BaseRegs.pop_back(); + F.DeleteBaseReg(F.BaseRegs[i]); (void)InsertFormula(LU, LUIdx, F); } } @@ -2321,8 +2452,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, } /// GenerateTruncates - Generate reuse formulae from different IV types. -void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, - Formula Base) { +void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { // This requires TargetLowering to tell us which truncates are free. if (!TLI) return; @@ -2479,7 +2609,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // TODO: Use a more targeted data structure. for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) { - Formula F = LU.Formulae[L]; + const Formula &F = LU.Formulae[L]; // Use the immediate in the scaled register. if (F.ScaledReg == OrigReg) { int64_t Offs = (uint64_t)F.AM.BaseOffs + @@ -2527,10 +2657,11 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end(); J != JE; ++J) if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J)) - if (C->getValue()->getValue().isNegative() != - (NewF.AM.BaseOffs < 0) && - C->getValue()->getValue().abs() - .ule(abs64(NewF.AM.BaseOffs))) + if ((C->getValue()->getValue() + NewF.AM.BaseOffs).abs().slt( + abs64(NewF.AM.BaseOffs)) && + (C->getValue()->getValue() + + NewF.AM.BaseOffs).countTrailingZeros() >= + CountTrailingZeros_64(NewF.AM.BaseOffs)) goto skip_formula; // Ok, looks good. @@ -2579,7 +2710,7 @@ LSRInstance::GenerateAllReuseFormulae() { /// by other uses, pick the best one and delete the others. void LSRInstance::FilterOutUndesirableDedicatedRegisters() { #ifndef NDEBUG - bool Changed = false; + bool ChangedFormulae = false; #endif // Collect the best formula for each unique set of shared registers. This @@ -2591,10 +2722,9 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { LSRUse &LU = Uses[LUIdx]; FormulaSorter Sorter(L, LU, SE, DT); + DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n'); - // Clear out the set of used regs; it will be recomputed. - LU.Regs.clear(); - + bool Any = false; for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms; ++FIdx) { Formula &F = LU.Formulae[FIdx]; @@ -2619,62 +2749,200 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { Formula &Best = LU.Formulae[P.first->second]; if (Sorter.operator()(F, Best)) std::swap(F, Best); - DEBUG(dbgs() << "Filtering out "; F.print(dbgs()); + DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); dbgs() << "\n" - " in favor of "; Best.print(dbgs()); + " in favor of formula "; Best.print(dbgs()); dbgs() << '\n'); #ifndef NDEBUG - Changed = true; + ChangedFormulae = true; #endif - std::swap(F, LU.Formulae.back()); - LU.Formulae.pop_back(); + LU.DeleteFormula(F); --FIdx; --NumForms; + Any = true; continue; } - if (F.ScaledReg) LU.Regs.insert(F.ScaledReg); - LU.Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); } + + // Now that we've filtered out some formulae, recompute the Regs set. + if (Any) + LU.RecomputeRegs(LUIdx, RegUses); + + // Reset this to prepare for the next use. BestFormulae.clear(); } - DEBUG(if (Changed) { + DEBUG(if (ChangedFormulae) { dbgs() << "\n" "After filtering out undesirable candidates:\n"; print_uses(dbgs()); }); } +// This is a rough guess that seems to work fairly well. +static const size_t ComplexityLimit = UINT16_MAX; + +/// EstimateSearchSpaceComplexity - Estimate the worst-case number of +/// solutions the solver might have to consider. It almost never considers +/// this many solutions because it prune the search space, but the pruning +/// isn't always sufficient. +size_t LSRInstance::EstimateSearchSpaceComplexity() const { + uint32_t Power = 1; + for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), + E = Uses.end(); I != E; ++I) { + size_t FSize = I->Formulae.size(); + if (FSize >= ComplexityLimit) { + Power = ComplexityLimit; + break; + } + Power *= FSize; + if (Power >= ComplexityLimit) + break; + } + return Power; +} + /// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of /// formulae to choose from, use some rough heuristics to prune down the number /// of formulae. This keeps the main solver from taking an extraordinary amount /// of time in some worst-case scenarios. void LSRInstance::NarrowSearchSpaceUsingHeuristics() { - // This is a rough guess that seems to work fairly well. - const size_t Limit = UINT16_MAX; + if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { + DEBUG(dbgs() << "The search space is too complex.\n"); - SmallPtrSet<const SCEV *, 4> Taken; - for (;;) { - // Estimate the worst-case number of solutions we might consider. We almost - // never consider this many solutions because we prune the search space, - // but the pruning isn't always sufficient. - uint32_t Power = 1; - for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), - E = Uses.end(); I != E; ++I) { - size_t FSize = I->Formulae.size(); - if (FSize >= Limit) { - Power = Limit; - break; + DEBUG(dbgs() << "Narrowing the search space by eliminating formulae " + "which use a superset of registers used by other " + "formulae.\n"); + + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; + bool Any = false; + for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) { + Formula &F = LU.Formulae[i]; + // Look for a formula with a constant or GV in a register. If the use + // also has a formula with that same value in an immediate field, + // delete the one that uses a register. + for (SmallVectorImpl<const SCEV *>::const_iterator + I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) { + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) { + Formula NewF = F; + NewF.AM.BaseOffs += C->getValue()->getSExtValue(); + NewF.BaseRegs.erase(NewF.BaseRegs.begin() + + (I - F.BaseRegs.begin())); + if (LU.HasFormulaWithSameRegs(NewF)) { + DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n'); + LU.DeleteFormula(F); + --i; + --e; + Any = true; + break; + } + } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) { + if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) + if (!F.AM.BaseGV) { + Formula NewF = F; + NewF.AM.BaseGV = GV; + NewF.BaseRegs.erase(NewF.BaseRegs.begin() + + (I - F.BaseRegs.begin())); + if (LU.HasFormulaWithSameRegs(NewF)) { + DEBUG(dbgs() << " Deleting "; F.print(dbgs()); + dbgs() << '\n'); + LU.DeleteFormula(F); + --i; + --e; + Any = true; + break; + } + } + } + } } - Power *= FSize; - if (Power >= Limit) - break; + if (Any) + LU.RecomputeRegs(LUIdx, RegUses); } - if (Power < Limit) - break; + DEBUG(dbgs() << "After pre-selection:\n"; + print_uses(dbgs())); + } + + if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { + DEBUG(dbgs() << "The search space is too complex.\n"); + + DEBUG(dbgs() << "Narrowing the search space by assuming that uses " + "separated by a constant offset will use the same " + "registers.\n"); + + // This is especially useful for unrolled loops. + + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; + for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(), + E = LU.Formulae.end(); I != E; ++I) { + const Formula &F = *I; + if (F.AM.BaseOffs != 0 && F.AM.Scale == 0) { + if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU)) { + if (reconcileNewOffset(*LUThatHas, F.AM.BaseOffs, + /*HasBaseReg=*/false, + LU.Kind, LU.AccessTy)) { + DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); + dbgs() << '\n'); + + LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; + + // Delete formulae from the new use which are no longer legal. + bool Any = false; + for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) { + Formula &F = LUThatHas->Formulae[i]; + if (!isLegalUse(F.AM, + LUThatHas->MinOffset, LUThatHas->MaxOffset, + LUThatHas->Kind, LUThatHas->AccessTy, TLI)) { + DEBUG(dbgs() << " Deleting "; F.print(dbgs()); + dbgs() << '\n'); + LUThatHas->DeleteFormula(F); + --i; + --e; + Any = true; + } + } + if (Any) + LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses); + + // Update the relocs to reference the new use. + for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(), + E = Fixups.end(); I != E; ++I) { + LSRFixup &Fixup = *I; + if (Fixup.LUIdx == LUIdx) { + Fixup.LUIdx = LUThatHas - &Uses.front(); + Fixup.Offset += F.AM.BaseOffs; + DEBUG(errs() << "New fixup has offset " + << Fixup.Offset << '\n'); + } + if (Fixup.LUIdx == NumUses-1) + Fixup.LUIdx = LUIdx; + } + + // Delete the old use. + DeleteUse(LU); + --LUIdx; + --NumUses; + break; + } + } + } + } + } + + DEBUG(dbgs() << "After pre-selection:\n"; + print_uses(dbgs())); + } + + // With all other options exhausted, loop until the system is simple + // enough to handle. + SmallPtrSet<const SCEV *, 4> Taken; + while (EstimateSearchSpaceComplexity() >= ComplexityLimit) { // Ok, we have too many of formulae on our hands to conveniently handle. // Use a rough heuristic to thin out the list. + DEBUG(dbgs() << "The search space is too complex.\n"); // Pick the register which is used by the most LSRUses, which is likely // to be a good reuse register candidate. @@ -2702,28 +2970,26 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { // In any use with formulae which references this register, delete formulae // which don't reference it. - for (SmallVectorImpl<LSRUse>::iterator I = Uses.begin(), - E = Uses.end(); I != E; ++I) { - LSRUse &LU = *I; + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; if (!LU.Regs.count(Best)) continue; - // Clear out the set of used regs; it will be recomputed. - LU.Regs.clear(); - + bool Any = false; for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) { Formula &F = LU.Formulae[i]; if (!F.referencesReg(Best)) { DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n'); - std::swap(LU.Formulae.back(), F); - LU.Formulae.pop_back(); + LU.DeleteFormula(F); --e; --i; + Any = true; + assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?"); continue; } - - if (F.ScaledReg) LU.Regs.insert(F.ScaledReg); - LU.Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); } + + if (Any) + LU.RecomputeRegs(LUIdx, RegUses); } DEBUG(dbgs() << "After pre-selection:\n"; @@ -2810,11 +3076,14 @@ retry: // If none of the formulae had all of the required registers, relax the // constraint so that we don't exclude all formulae. if (!AnySatisfiedReqRegs) { + assert(!ReqRegs.empty() && "Solver failed even without required registers"); ReqRegs.clear(); goto retry; } } +/// Solve - Choose one formula from each use. Return the results in the given +/// Solution vector. void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const { SmallVector<const Formula *, 8> Workspace; Cost SolutionCost; @@ -2824,6 +3093,7 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const { DenseSet<const SCEV *> VisitedRegs; Workspace.reserve(Uses.size()); + // SolveRecurse does all the work. SolveRecurse(Solution, SolutionCost, Workspace, CurCost, CurRegs, VisitedRegs); @@ -2839,17 +3109,8 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const { Solution[i]->print(dbgs()); dbgs() << '\n'; }); -} -/// getImmediateDominator - A handy utility for the specific DominatorTree -/// query that we need here. -/// -static BasicBlock *getImmediateDominator(BasicBlock *BB, DominatorTree &DT) { - DomTreeNode *Node = DT.getNode(BB); - if (!Node) return 0; - Node = Node->getIDom(); - if (!Node) return 0; - return Node->getBlock(); + assert(Solution.size() == Uses.size() && "Malformed solution!"); } /// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up @@ -2865,9 +3126,11 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0; BasicBlock *IDom; - for (BasicBlock *Rung = IP->getParent(); ; Rung = IDom) { - IDom = getImmediateDominator(Rung, DT); - if (!IDom) return IP; + for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) { + if (!Rung) return IP; + Rung = Rung->getIDom(); + if (!Rung) return IP; + IDom = Rung->getBlock(); // Don't climb into a loop though. const Loop *IDomLoop = LI.getLoopFor(IDom); @@ -2891,7 +3154,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, // instead of at the end, so that it can be used for other expansions. if (IDom == Inst->getParent() && (!BetterPos || DT.dominates(BetterPos, Inst))) - BetterPos = next(BasicBlock::iterator(Inst)); + BetterPos = llvm::next(BasicBlock::iterator(Inst)); } if (!AllDominate) break; @@ -2957,6 +3220,8 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP, return IP; } +/// Expand - Emit instructions for the leading candidate expression for this +/// LSRUse (this is called "expanding"). Value *LSRInstance::Expand(const LSRFixup &LF, const Formula &F, BasicBlock::iterator IP, @@ -3212,6 +3477,8 @@ void LSRInstance::Rewrite(const LSRFixup &LF, DeadInsts.push_back(LF.OperandValToReplace); } +/// ImplementSolution - Rewrite all the fixup locations with new values, +/// following the chosen solution. void LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, Pass *P) { @@ -3224,10 +3491,11 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, Rewriter.setIVIncInsertPos(L, IVIncInsertPos); // Expand the new value definitions and update the users. - for (size_t i = 0, e = Fixups.size(); i != e; ++i) { - size_t LUIdx = Fixups[i].LUIdx; + for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(), + E = Fixups.end(); I != E; ++I) { + const LSRFixup &Fixup = *I; - Rewrite(Fixups[i], *Solution[LUIdx], Rewriter, DeadInsts, P); + Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P); Changed = true; } @@ -3256,13 +3524,11 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false); dbgs() << ":\n"); - /// OptimizeShadowIV - If IV is used in a int-to-float cast - /// inside the loop then try to eliminate the cast operation. + // First, perform some low-level loop optimizations. OptimizeShadowIV(); + OptimizeLoopTermCond(); - // Change loop terminating condition to use the postinc iv when possible. - Changed |= OptimizeLoopTermCond(); - + // Start collecting data and preparing for the solver. CollectInterestingTypesAndFactors(); CollectFixupsAndInitialFormulae(); CollectLoopInvariantFixupsAndFormulae(); @@ -3283,7 +3549,6 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) SmallVector<const Formula *, 8> Solution; Solve(Solution); - assert(Solution.size() == Uses.size() && "Malformed solution!"); // Release memory that is no longer needed. Factors.clear(); @@ -3333,9 +3598,8 @@ void LSRInstance::print_fixups(raw_ostream &OS) const { OS << "LSR is examining the following fixup sites:\n"; for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(), E = Fixups.end(); I != E; ++I) { - const LSRFixup &LF = *I; dbgs() << " "; - LF.print(OS); + I->print(OS); OS << '\n'; } } diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index b621e8d..9744100 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -58,13 +58,20 @@ FunctionPass *llvm::createCFGSimplificationPass() { /// ChangeToUnreachable - Insert an unreachable instruction before the specified /// instruction, making it and the rest of the code in the block dead. -static void ChangeToUnreachable(Instruction *I) { +static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { BasicBlock *BB = I->getParent(); // Loop over all of the successors, removing BB's entry from any PHI // nodes. for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) (*SI)->removePredecessor(BB); + // Insert a call to llvm.trap right before this. This turns the undefined + // behavior into a hard fail instead of falling through into random code. + if (UseLLVMTrap) { + Function *TrapFn = + Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap); + CallInst::Create(TrapFn, "", I); + } new UnreachableInst(I->getContext(), I); // All instructions after this are dead. @@ -118,7 +125,8 @@ static bool MarkAliveBlocks(BasicBlock *BB, // though. ++BBI; if (!isa<UnreachableInst>(BBI)) { - ChangeToUnreachable(BBI); + // Don't insert a call to llvm.trap right before the unreachable. + ChangeToUnreachable(BBI, false); Changed = true; } break; @@ -134,7 +142,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, if (isa<UndefValue>(Ptr) || (isa<ConstantPointerNull>(Ptr) && SI->getPointerAddressSpace() == 0)) { - ChangeToUnreachable(SI); + ChangeToUnreachable(SI, true); Changed = true; break; } diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index b053cfc..7414be7 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -558,10 +558,13 @@ struct MemCmpOpt : public LibCallOptimization { if (Len == 0) // memcmp(s1,s2,0) -> 0 return Constant::getNullValue(CI->getType()); - if (Len == 1) { // memcmp(S1,S2,1) -> *LHS - *RHS - Value *LHSV = B.CreateLoad(CastToCStr(LHS, B), "lhsv"); - Value *RHSV = B.CreateLoad(CastToCStr(RHS, B), "rhsv"); - return B.CreateSExt(B.CreateSub(LHSV, RHSV, "chardiff"), CI->getType()); + // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS + if (Len == 1) { + Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"), + CI->getType(), "lhsv"); + Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"), + CI->getType(), "rhsv"); + return B.CreateSub(LHSV, RHSV, "chardiff"); } // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant) diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp new file mode 100644 index 0000000..b88ba48 --- /dev/null +++ b/lib/Transforms/Scalar/Sink.cpp @@ -0,0 +1,267 @@ +//===-- Sink.cpp - Code Sinking -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass moves instructions into successor blocks, when possible, so that +// they aren't executed on paths where their results aren't needed. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sink" +#include "llvm/Transforms/Scalar.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +STATISTIC(NumSunk, "Number of instructions sunk"); + +namespace { + class Sinking : public FunctionPass { + DominatorTree *DT; + LoopInfo *LI; + AliasAnalysis *AA; + + public: + static char ID; // Pass identification + Sinking() : FunctionPass(&ID) {} + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + FunctionPass::getAnalysisUsage(AU); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<DominatorTree>(); + AU.addRequired<LoopInfo>(); + AU.addPreserved<DominatorTree>(); + AU.addPreserved<LoopInfo>(); + } + private: + bool ProcessBlock(BasicBlock &BB); + bool SinkInstruction(Instruction *I, SmallPtrSet<Instruction *, 8> &Stores); + bool AllUsesDominatedByBlock(Instruction *Inst, BasicBlock *BB) const; + }; +} // end anonymous namespace + +char Sinking::ID = 0; +static RegisterPass<Sinking> +X("sink", "Code sinking"); + +FunctionPass *llvm::createSinkingPass() { return new Sinking(); } + +/// AllUsesDominatedByBlock - Return true if all uses of the specified value +/// occur in blocks dominated by the specified block. +bool Sinking::AllUsesDominatedByBlock(Instruction *Inst, + BasicBlock *BB) const { + // Ignoring debug uses is necessary so debug info doesn't affect the code. + // This may leave a referencing dbg_value in the original block, before + // the definition of the vreg. Dwarf generator handles this although the + // user might not get the right info at runtime. + for (Value::use_iterator I = Inst->use_begin(), + E = Inst->use_end(); I != E; ++I) { + // Determine the block of the use. + Instruction *UseInst = cast<Instruction>(*I); + BasicBlock *UseBlock = UseInst->getParent(); + if (PHINode *PN = dyn_cast<PHINode>(UseInst)) { + // PHI nodes use the operand in the predecessor block, not the block with + // the PHI. + unsigned Num = PHINode::getIncomingValueNumForOperand(I.getOperandNo()); + UseBlock = PN->getIncomingBlock(Num); + } + // Check that it dominates. + if (!DT->dominates(BB, UseBlock)) + return false; + } + return true; +} + +bool Sinking::runOnFunction(Function &F) { + DT = &getAnalysis<DominatorTree>(); + LI = &getAnalysis<LoopInfo>(); + AA = &getAnalysis<AliasAnalysis>(); + + bool EverMadeChange = false; + + while (1) { + bool MadeChange = false; + + // Process all basic blocks. + for (Function::iterator I = F.begin(), E = F.end(); + I != E; ++I) + MadeChange |= ProcessBlock(*I); + + // If this iteration over the code changed anything, keep iterating. + if (!MadeChange) break; + EverMadeChange = true; + } + return EverMadeChange; +} + +bool Sinking::ProcessBlock(BasicBlock &BB) { + // Can't sink anything out of a block that has less than two successors. + if (BB.getTerminator()->getNumSuccessors() <= 1 || BB.empty()) return false; + + // Don't bother sinking code out of unreachable blocks. In addition to being + // unprofitable, it can also lead to infinite looping, because in an unreachable + // loop there may be nowhere to stop. + if (!DT->isReachableFromEntry(&BB)) return false; + + bool MadeChange = false; + + // Walk the basic block bottom-up. Remember if we saw a store. + BasicBlock::iterator I = BB.end(); + --I; + bool ProcessedBegin = false; + SmallPtrSet<Instruction *, 8> Stores; + do { + Instruction *Inst = I; // The instruction to sink. + + // Predecrement I (if it's not begin) so that it isn't invalidated by + // sinking. + ProcessedBegin = I == BB.begin(); + if (!ProcessedBegin) + --I; + + if (isa<DbgInfoIntrinsic>(Inst)) + continue; + + if (SinkInstruction(Inst, Stores)) + ++NumSunk, MadeChange = true; + + // If we just processed the first instruction in the block, we're done. + } while (!ProcessedBegin); + + return MadeChange; +} + +static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA, + SmallPtrSet<Instruction *, 8> &Stores) { + if (LoadInst *L = dyn_cast<LoadInst>(Inst)) { + if (L->isVolatile()) return false; + + Value *Ptr = L->getPointerOperand(); + unsigned Size = AA->getTypeStoreSize(L->getType()); + for (SmallPtrSet<Instruction *, 8>::iterator I = Stores.begin(), + E = Stores.end(); I != E; ++I) + if (AA->getModRefInfo(*I, Ptr, Size) & AliasAnalysis::Mod) + return false; + } + + if (Inst->mayWriteToMemory()) { + Stores.insert(Inst); + return false; + } + + return Inst->isSafeToSpeculativelyExecute(); +} + +/// SinkInstruction - Determine whether it is safe to sink the specified machine +/// instruction out of its current block into a successor. +bool Sinking::SinkInstruction(Instruction *Inst, + SmallPtrSet<Instruction *, 8> &Stores) { + // Check if it's safe to move the instruction. + if (!isSafeToMove(Inst, AA, Stores)) + return false; + + // FIXME: This should include support for sinking instructions within the + // block they are currently in to shorten the live ranges. We often get + // instructions sunk into the top of a large block, but it would be better to + // also sink them down before their first use in the block. This xform has to + // be careful not to *increase* register pressure though, e.g. sinking + // "x = y + z" down if it kills y and z would increase the live ranges of y + // and z and only shrink the live range of x. + + // Loop over all the operands of the specified instruction. If there is + // anything we can't handle, bail out. + BasicBlock *ParentBlock = Inst->getParent(); + + // SuccToSinkTo - This is the successor to sink this instruction to, once we + // decide. + BasicBlock *SuccToSinkTo = 0; + + // FIXME: This picks a successor to sink into based on having one + // successor that dominates all the uses. However, there are cases where + // sinking can happen but where the sink point isn't a successor. For + // example: + // x = computation + // if () {} else {} + // use x + // the instruction could be sunk over the whole diamond for the + // if/then/else (or loop, etc), allowing it to be sunk into other blocks + // after that. + + // Instructions can only be sunk if all their uses are in blocks + // dominated by one of the successors. + // Look at all the successors and decide which one + // we should sink to. + for (succ_iterator SI = succ_begin(ParentBlock), + E = succ_end(ParentBlock); SI != E; ++SI) { + if (AllUsesDominatedByBlock(Inst, *SI)) { + SuccToSinkTo = *SI; + break; + } + } + + // If we couldn't find a block to sink to, ignore this instruction. + if (SuccToSinkTo == 0) + return false; + + // It is not possible to sink an instruction into its own block. This can + // happen with loops. + if (Inst->getParent() == SuccToSinkTo) + return false; + + DEBUG(dbgs() << "Sink instr " << *Inst); + DEBUG(dbgs() << "to block "; + WriteAsOperand(dbgs(), SuccToSinkTo, false)); + + // If the block has multiple predecessors, this would introduce computation on + // a path that it doesn't already exist. We could split the critical edge, + // but for now we just punt. + // FIXME: Split critical edges if not backedges. + if (SuccToSinkTo->getUniquePredecessor() != ParentBlock) { + // We cannot sink a load across a critical edge - there may be stores in + // other code paths. + if (!Inst->isSafeToSpeculativelyExecute()) { + DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n"); + return false; + } + + // We don't want to sink across a critical edge if we don't dominate the + // successor. We could be introducing calculations to new code paths. + if (!DT->dominates(ParentBlock, SuccToSinkTo)) { + DEBUG(dbgs() << " *** PUNTING: Critical edge found\n"); + return false; + } + + // Don't sink instructions into a loop. + if (LI->isLoopHeader(SuccToSinkTo)) { + DEBUG(dbgs() << " *** PUNTING: Loop header found\n"); + return false; + } + + // Otherwise we are OK with sinking along a critical edge. + DEBUG(dbgs() << "Sinking along critical edge.\n"); + } + + // Determine where to insert into. Skip phi nodes. + BasicBlock::iterator InsertPos = SuccToSinkTo->begin(); + while (InsertPos != SuccToSinkTo->end() && isa<PHINode>(InsertPos)) + ++InsertPos; + + // Move the instruction. + Inst->moveBefore(InsertPos); + return true; +} diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 8ad66dd..6d4fe4b 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -344,7 +344,7 @@ static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD) { DILocation OrigLocation = ILoc.getOrigLocation(); MDNode *NewLoc = TheCallMD; if (OrigLocation.Verify()) - NewLoc = UpdateInlinedAtInfo(OrigLocation.getNode(), TheCallMD); + NewLoc = UpdateInlinedAtInfo(OrigLocation, TheCallMD); Value *MDVs[] = { InsnMD->getOperand(0), // Line diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index f181f3a..13f0a28 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -861,7 +861,7 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, // Find the nearest store that has a lower than this load. StoresByIndexTy::iterator I = std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), - std::pair<unsigned, StoreInst*>(LoadIdx, 0), + std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)), StoreIndexSearchPredicate()); // If there is no store before this load, then we can't promote this load. @@ -886,7 +886,7 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI) { DIVariable DIVar(DDI->getVariable()); - if (!DIVar.getNode()) + if (!DIVar.Verify()) return; if (!DIF) diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 25d50db..f4bdb527 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "ssaupdater" -#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Instructions.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/AlignOf.h" @@ -20,40 +19,17 @@ #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/SSAUpdaterImpl.h" using namespace llvm; -/// BBInfo - Per-basic block information used internally by SSAUpdater. -/// The predecessors of each block are cached here since pred_iterator is -/// slow and we need to iterate over the blocks at least a few times. -class SSAUpdater::BBInfo { -public: - BasicBlock *BB; // Back-pointer to the corresponding block. - Value *AvailableVal; // Value to use in this block. - BBInfo *DefBB; // Block that defines the available value. - int BlkNum; // Postorder number. - BBInfo *IDom; // Immediate dominator. - unsigned NumPreds; // Number of predecessor blocks. - BBInfo **Preds; // Array[NumPreds] of predecessor blocks. - PHINode *PHITag; // Marker for existing PHIs that match. - - BBInfo(BasicBlock *ThisBB, Value *V) - : BB(ThisBB), AvailableVal(V), DefBB(V ? this : 0), BlkNum(0), IDom(0), - NumPreds(0), Preds(0), PHITag(0) { } -}; - -typedef DenseMap<BasicBlock*, SSAUpdater::BBInfo*> BBMapTy; - typedef DenseMap<BasicBlock*, Value*> AvailableValsTy; static AvailableValsTy &getAvailableVals(void *AV) { return *static_cast<AvailableValsTy*>(AV); } -static BBMapTy *getBBMap(void *BM) { - return static_cast<BBMapTy*>(BM); -} - SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI) - : AV(0), PrototypeValue(0), BM(0), InsertedPHIs(NewPHI) {} + : AV(0), PrototypeValue(0), InsertedPHIs(NewPHI) {} SSAUpdater::~SSAUpdater() { delete &getAvailableVals(AV); @@ -105,9 +81,7 @@ static bool IsEquivalentPHI(PHINode *PHI, /// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is /// live at the end of the specified block. Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) { - assert(BM == 0 && "Unexpected Internal State"); Value *Res = GetValueAtEndOfBlockInternal(BB); - assert(BM == 0 && "Unexpected Internal State"); return Res; } @@ -231,427 +205,126 @@ void SSAUpdater::RewriteUse(Use &U) { U.set(V); } -/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry -/// for the specified BB and if so, return it. If not, construct SSA form by -/// first calculating the required placement of PHIs and then inserting new -/// PHIs where needed. -Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { - AvailableValsTy &AvailableVals = getAvailableVals(AV); - if (Value *V = AvailableVals[BB]) - return V; - - // Pool allocation used internally by GetValueAtEndOfBlock. - BumpPtrAllocator Allocator; - BBMapTy BBMapObj; - BM = &BBMapObj; - - SmallVector<BBInfo*, 100> BlockList; - BuildBlockList(BB, &BlockList, &Allocator); - - // Special case: bail out if BB is unreachable. - if (BlockList.size() == 0) { - BM = 0; - return UndefValue::get(PrototypeValue->getType()); - } - - FindDominators(&BlockList); - FindPHIPlacement(&BlockList); - FindAvailableVals(&BlockList); - - BM = 0; - return BBMapObj[BB]->DefBB->AvailableVal; +/// PHIiter - Iterator for PHI operands. This is used for the PHI_iterator +/// in the SSAUpdaterImpl template. +namespace { + class PHIiter { + private: + PHINode *PHI; + unsigned idx; + + public: + explicit PHIiter(PHINode *P) // begin iterator + : PHI(P), idx(0) {} + PHIiter(PHINode *P, bool) // end iterator + : PHI(P), idx(PHI->getNumIncomingValues()) {} + + PHIiter &operator++() { ++idx; return *this; } + bool operator==(const PHIiter& x) const { return idx == x.idx; } + bool operator!=(const PHIiter& x) const { return !operator==(x); } + Value *getIncomingValue() { return PHI->getIncomingValue(idx); } + BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); } + }; } -/// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds -/// vector, set Info->NumPreds, and allocate space in Info->Preds. -static void FindPredecessorBlocks(SSAUpdater::BBInfo *Info, - SmallVectorImpl<BasicBlock*> *Preds, - BumpPtrAllocator *Allocator) { - // We can get our predecessor info by walking the pred_iterator list, - // but it is relatively slow. If we already have PHI nodes in this - // block, walk one of them to get the predecessor list instead. - BasicBlock *BB = Info->BB; - if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { - for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI) - Preds->push_back(SomePhi->getIncomingBlock(PI)); - } else { - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - Preds->push_back(*PI); +/// SSAUpdaterTraits<SSAUpdater> - Traits for the SSAUpdaterImpl template, +/// specialized for SSAUpdater. +namespace llvm { +template<> +class SSAUpdaterTraits<SSAUpdater> { +public: + typedef BasicBlock BlkT; + typedef Value *ValT; + typedef PHINode PhiT; + + typedef succ_iterator BlkSucc_iterator; + static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); } + static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); } + + typedef PHIiter PHI_iterator; + static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); } + static inline PHI_iterator PHI_end(PhiT *PHI) { + return PHI_iterator(PHI, true); } - Info->NumPreds = Preds->size(); - Info->Preds = static_cast<SSAUpdater::BBInfo**> - (Allocator->Allocate(Info->NumPreds * sizeof(SSAUpdater::BBInfo*), - AlignOf<SSAUpdater::BBInfo*>::Alignment)); -} - -/// BuildBlockList - Starting from the specified basic block, traverse back -/// through its predecessors until reaching blocks with known values. Create -/// BBInfo structures for the blocks and append them to the block list. -void SSAUpdater::BuildBlockList(BasicBlock *BB, BlockListTy *BlockList, - BumpPtrAllocator *Allocator) { - AvailableValsTy &AvailableVals = getAvailableVals(AV); - BBMapTy *BBMap = getBBMap(BM); - SmallVector<BBInfo*, 10> RootList; - SmallVector<BBInfo*, 64> WorkList; - - BBInfo *Info = new (*Allocator) BBInfo(BB, 0); - (*BBMap)[BB] = Info; - WorkList.push_back(Info); - - // Search backward from BB, creating BBInfos along the way and stopping when - // reaching blocks that define the value. Record those defining blocks on - // the RootList. - SmallVector<BasicBlock*, 10> Preds; - while (!WorkList.empty()) { - Info = WorkList.pop_back_val(); - Preds.clear(); - FindPredecessorBlocks(Info, &Preds, Allocator); - - // Treat an unreachable predecessor as a definition with 'undef'. - if (Info->NumPreds == 0) { - Info->AvailableVal = UndefValue::get(PrototypeValue->getType()); - Info->DefBB = Info; - RootList.push_back(Info); - continue; - } - - for (unsigned p = 0; p != Info->NumPreds; ++p) { - BasicBlock *Pred = Preds[p]; - // Check if BBMap already has a BBInfo for the predecessor block. - BBMapTy::value_type &BBMapBucket = BBMap->FindAndConstruct(Pred); - if (BBMapBucket.second) { - Info->Preds[p] = BBMapBucket.second; - continue; - } - - // Create a new BBInfo for the predecessor. - Value *PredVal = AvailableVals.lookup(Pred); - BBInfo *PredInfo = new (*Allocator) BBInfo(Pred, PredVal); - BBMapBucket.second = PredInfo; - Info->Preds[p] = PredInfo; - - if (PredInfo->AvailableVal) { - RootList.push_back(PredInfo); - continue; - } - WorkList.push_back(PredInfo); + /// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds + /// vector, set Info->NumPreds, and allocate space in Info->Preds. + static void FindPredecessorBlocks(BasicBlock *BB, + SmallVectorImpl<BasicBlock*> *Preds) { + // We can get our predecessor info by walking the pred_iterator list, + // but it is relatively slow. If we already have PHI nodes in this + // block, walk one of them to get the predecessor list instead. + if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { + for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI) + Preds->push_back(SomePhi->getIncomingBlock(PI)); + } else { + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + Preds->push_back(*PI); } } - // Now that we know what blocks are backwards-reachable from the starting - // block, do a forward depth-first traversal to assign postorder numbers - // to those blocks. - BBInfo *PseudoEntry = new (*Allocator) BBInfo(0, 0); - unsigned BlkNum = 1; - - // Initialize the worklist with the roots from the backward traversal. - while (!RootList.empty()) { - Info = RootList.pop_back_val(); - Info->IDom = PseudoEntry; - Info->BlkNum = -1; - WorkList.push_back(Info); + /// GetUndefVal - Get an undefined value of the same type as the value + /// being handled. + static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) { + return UndefValue::get(Updater->PrototypeValue->getType()); } - while (!WorkList.empty()) { - Info = WorkList.back(); - - if (Info->BlkNum == -2) { - // All the successors have been handled; assign the postorder number. - Info->BlkNum = BlkNum++; - // If not a root, put it on the BlockList. - if (!Info->AvailableVal) - BlockList->push_back(Info); - WorkList.pop_back(); - continue; - } - - // Leave this entry on the worklist, but set its BlkNum to mark that its - // successors have been put on the worklist. When it returns to the top - // the list, after handling its successors, it will be assigned a number. - Info->BlkNum = -2; - - // Add unvisited successors to the work list. - for (succ_iterator SI = succ_begin(Info->BB), E = succ_end(Info->BB); - SI != E; ++SI) { - BBInfo *SuccInfo = (*BBMap)[*SI]; - if (!SuccInfo || SuccInfo->BlkNum) - continue; - SuccInfo->BlkNum = -1; - WorkList.push_back(SuccInfo); - } + /// CreateEmptyPHI - Create a new PHI instruction in the specified block. + /// Reserve space for the operands but do not fill them in yet. + static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds, + SSAUpdater *Updater) { + PHINode *PHI = PHINode::Create(Updater->PrototypeValue->getType(), + Updater->PrototypeValue->getName(), + &BB->front()); + PHI->reserveOperandSpace(NumPreds); + return PHI; } - PseudoEntry->BlkNum = BlkNum; -} -/// IntersectDominators - This is the dataflow lattice "meet" operation for -/// finding dominators. Given two basic blocks, it walks up the dominator -/// tree until it finds a common dominator of both. It uses the postorder -/// number of the blocks to determine how to do that. -static SSAUpdater::BBInfo *IntersectDominators(SSAUpdater::BBInfo *Blk1, - SSAUpdater::BBInfo *Blk2) { - while (Blk1 != Blk2) { - while (Blk1->BlkNum < Blk2->BlkNum) { - Blk1 = Blk1->IDom; - if (!Blk1) - return Blk2; - } - while (Blk2->BlkNum < Blk1->BlkNum) { - Blk2 = Blk2->IDom; - if (!Blk2) - return Blk1; - } + /// AddPHIOperand - Add the specified value as an operand of the PHI for + /// the specified predecessor block. + static void AddPHIOperand(PHINode *PHI, Value *Val, BasicBlock *Pred) { + PHI->addIncoming(Val, Pred); } - return Blk1; -} -/// FindDominators - Calculate the dominator tree for the subset of the CFG -/// corresponding to the basic blocks on the BlockList. This uses the -/// algorithm from: "A Simple, Fast Dominance Algorithm" by Cooper, Harvey and -/// Kennedy, published in Software--Practice and Experience, 2001, 4:1-10. -/// Because the CFG subset does not include any edges leading into blocks that -/// define the value, the results are not the usual dominator tree. The CFG -/// subset has a single pseudo-entry node with edges to a set of root nodes -/// for blocks that define the value. The dominators for this subset CFG are -/// not the standard dominators but they are adequate for placing PHIs within -/// the subset CFG. -void SSAUpdater::FindDominators(BlockListTy *BlockList) { - bool Changed; - do { - Changed = false; - // Iterate over the list in reverse order, i.e., forward on CFG edges. - for (BlockListTy::reverse_iterator I = BlockList->rbegin(), - E = BlockList->rend(); I != E; ++I) { - BBInfo *Info = *I; - - // Start with the first predecessor. - assert(Info->NumPreds > 0 && "unreachable block"); - BBInfo *NewIDom = Info->Preds[0]; - - // Iterate through the block's other predecessors. - for (unsigned p = 1; p != Info->NumPreds; ++p) { - BBInfo *Pred = Info->Preds[p]; - NewIDom = IntersectDominators(NewIDom, Pred); - } - - // Check if the IDom value has changed. - if (NewIDom != Info->IDom) { - Info->IDom = NewIDom; - Changed = true; - } - } - } while (Changed); -} - -/// IsDefInDomFrontier - Search up the dominator tree from Pred to IDom for -/// any blocks containing definitions of the value. If one is found, then the -/// successor of Pred is in the dominance frontier for the definition, and -/// this function returns true. -static bool IsDefInDomFrontier(const SSAUpdater::BBInfo *Pred, - const SSAUpdater::BBInfo *IDom) { - for (; Pred != IDom; Pred = Pred->IDom) { - if (Pred->DefBB == Pred) - return true; + /// InstrIsPHI - Check if an instruction is a PHI. + /// + static PHINode *InstrIsPHI(Instruction *I) { + return dyn_cast<PHINode>(I); } - return false; -} - -/// FindPHIPlacement - PHIs are needed in the iterated dominance frontiers of -/// the known definitions. Iteratively add PHIs in the dom frontiers until -/// nothing changes. Along the way, keep track of the nearest dominating -/// definitions for non-PHI blocks. -void SSAUpdater::FindPHIPlacement(BlockListTy *BlockList) { - bool Changed; - do { - Changed = false; - // Iterate over the list in reverse order, i.e., forward on CFG edges. - for (BlockListTy::reverse_iterator I = BlockList->rbegin(), - E = BlockList->rend(); I != E; ++I) { - BBInfo *Info = *I; - - // If this block already needs a PHI, there is nothing to do here. - if (Info->DefBB == Info) - continue; - - // Default to use the same def as the immediate dominator. - BBInfo *NewDefBB = Info->IDom->DefBB; - for (unsigned p = 0; p != Info->NumPreds; ++p) { - if (IsDefInDomFrontier(Info->Preds[p], Info->IDom)) { - // Need a PHI here. - NewDefBB = Info; - break; - } - } - - // Check if anything changed. - if (NewDefBB != Info->DefBB) { - Info->DefBB = NewDefBB; - Changed = true; - } - } - } while (Changed); -} - -/// FindAvailableVal - If this block requires a PHI, first check if an existing -/// PHI matches the PHI placement and reaching definitions computed earlier, -/// and if not, create a new PHI. Visit all the block's predecessors to -/// calculate the available value for each one and fill in the incoming values -/// for a new PHI. -void SSAUpdater::FindAvailableVals(BlockListTy *BlockList) { - AvailableValsTy &AvailableVals = getAvailableVals(AV); - // Go through the worklist in forward order (i.e., backward through the CFG) - // and check if existing PHIs can be used. If not, create empty PHIs where - // they are needed. - for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end(); - I != E; ++I) { - BBInfo *Info = *I; - // Check if there needs to be a PHI in BB. - if (Info->DefBB != Info) - continue; - - // Look for an existing PHI. - FindExistingPHI(Info->BB, BlockList); - if (Info->AvailableVal) - continue; - - PHINode *PHI = PHINode::Create(PrototypeValue->getType(), - PrototypeValue->getName(), - &Info->BB->front()); - PHI->reserveOperandSpace(Info->NumPreds); - Info->AvailableVal = PHI; - AvailableVals[Info->BB] = PHI; + /// ValueIsPHI - Check if a value is a PHI. + /// + static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) { + return dyn_cast<PHINode>(Val); } - // Now go back through the worklist in reverse order to fill in the arguments - // for any new PHIs added in the forward traversal. - for (BlockListTy::reverse_iterator I = BlockList->rbegin(), - E = BlockList->rend(); I != E; ++I) { - BBInfo *Info = *I; - - if (Info->DefBB != Info) { - // Record the available value at join nodes to speed up subsequent - // uses of this SSAUpdater for the same value. - if (Info->NumPreds > 1) - AvailableVals[Info->BB] = Info->DefBB->AvailableVal; - continue; - } - - // Check if this block contains a newly added PHI. - PHINode *PHI = dyn_cast<PHINode>(Info->AvailableVal); - if (!PHI || PHI->getNumIncomingValues() == Info->NumPreds) - continue; - - // Iterate through the block's predecessors. - for (unsigned p = 0; p != Info->NumPreds; ++p) { - BBInfo *PredInfo = Info->Preds[p]; - BasicBlock *Pred = PredInfo->BB; - // Skip to the nearest preceding definition. - if (PredInfo->DefBB != PredInfo) - PredInfo = PredInfo->DefBB; - PHI->addIncoming(PredInfo->AvailableVal, Pred); - } - - DEBUG(dbgs() << " Inserted PHI: " << *PHI << "\n"); - - // If the client wants to know about all new instructions, tell it. - if (InsertedPHIs) InsertedPHIs->push_back(PHI); + /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source + /// operands, i.e., it was just added. + static PHINode *ValueIsNewPHI(Value *Val, SSAUpdater *Updater) { + PHINode *PHI = ValueIsPHI(Val, Updater); + if (PHI && PHI->getNumIncomingValues() == 0) + return PHI; + return 0; } -} -/// FindExistingPHI - Look through the PHI nodes in a block to see if any of -/// them match what is needed. -void SSAUpdater::FindExistingPHI(BasicBlock *BB, BlockListTy *BlockList) { - PHINode *SomePHI; - for (BasicBlock::iterator It = BB->begin(); - (SomePHI = dyn_cast<PHINode>(It)); ++It) { - if (CheckIfPHIMatches(SomePHI)) { - RecordMatchingPHI(SomePHI); - break; - } - // Match failed: clear all the PHITag values. - for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end(); - I != E; ++I) - (*I)->PHITag = 0; + /// GetPHIValue - For the specified PHI instruction, return the value + /// that it defines. + static Value *GetPHIValue(PHINode *PHI) { + return PHI; } -} +}; -/// CheckIfPHIMatches - Check if a PHI node matches the placement and values -/// in the BBMap. -bool SSAUpdater::CheckIfPHIMatches(PHINode *PHI) { - BBMapTy *BBMap = getBBMap(BM); - SmallVector<PHINode*, 20> WorkList; - WorkList.push_back(PHI); - - // Mark that the block containing this PHI has been visited. - (*BBMap)[PHI->getParent()]->PHITag = PHI; - - while (!WorkList.empty()) { - PHI = WorkList.pop_back_val(); - - // Iterate through the PHI's incoming values. - for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { - Value *IncomingVal = PHI->getIncomingValue(i); - BBInfo *PredInfo = (*BBMap)[PHI->getIncomingBlock(i)]; - // Skip to the nearest preceding definition. - if (PredInfo->DefBB != PredInfo) - PredInfo = PredInfo->DefBB; - - // Check if it matches the expected value. - if (PredInfo->AvailableVal) { - if (IncomingVal == PredInfo->AvailableVal) - continue; - return false; - } - - // Check if the value is a PHI in the correct block. - PHINode *IncomingPHIVal = dyn_cast<PHINode>(IncomingVal); - if (!IncomingPHIVal || IncomingPHIVal->getParent() != PredInfo->BB) - return false; - - // If this block has already been visited, check if this PHI matches. - if (PredInfo->PHITag) { - if (IncomingPHIVal == PredInfo->PHITag) - continue; - return false; - } - PredInfo->PHITag = IncomingPHIVal; - - WorkList.push_back(IncomingPHIVal); - } - } - return true; -} +} // End llvm namespace -/// RecordMatchingPHI - For a PHI node that matches, record it and its input -/// PHIs in both the BBMap and the AvailableVals mapping. -void SSAUpdater::RecordMatchingPHI(PHINode *PHI) { - BBMapTy *BBMap = getBBMap(BM); +/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry +/// for the specified BB and if so, return it. If not, construct SSA form by +/// first calculating the required placement of PHIs and then inserting new +/// PHIs where needed. +Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { AvailableValsTy &AvailableVals = getAvailableVals(AV); - SmallVector<PHINode*, 20> WorkList; - WorkList.push_back(PHI); - - // Record this PHI. - BasicBlock *BB = PHI->getParent(); - AvailableVals[BB] = PHI; - (*BBMap)[BB]->AvailableVal = PHI; - - while (!WorkList.empty()) { - PHI = WorkList.pop_back_val(); - - // Iterate through the PHI's incoming values. - for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { - PHINode *IncomingPHIVal = dyn_cast<PHINode>(PHI->getIncomingValue(i)); - if (!IncomingPHIVal) continue; - BB = IncomingPHIVal->getParent(); - BBInfo *Info = (*BBMap)[BB]; - if (!Info || Info->AvailableVal) - continue; - - // Record the PHI and add it to the worklist. - AvailableVals[BB] = IncomingPHIVal; - Info->AvailableVal = IncomingPHIVal; - WorkList.push_back(IncomingPHIVal); - } - } + if (Value *V = AvailableVals[BB]) + return V; + + SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs); + return Impl.GetValue(BB); } diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index 6c1aa5e..e48c026 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -677,11 +677,16 @@ void SlotTracker::processFunction() { if (!I->getType()->isVoidTy() && !I->hasName()) CreateFunctionSlot(I); - // Intrinsics can directly use metadata. - if (isa<IntrinsicInst>(I)) - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) - if (MDNode *N = dyn_cast_or_null<MDNode>(I->getOperand(i))) - CreateMetadataSlot(N); + // Intrinsics can directly use metadata. We allow direct calls to any + // llvm.foo function here, because the target may not be linked into the + // optimizer. + if (const CallInst *CI = dyn_cast<CallInst>(I)) { + if (Function *F = CI->getCalledFunction()) + if (F->getName().startswith("llvm.")) + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (MDNode *N = dyn_cast_or_null<MDNode>(I->getOperand(i))) + CreateMetadataSlot(N); + } // Process metadata attached with this instruction. I->getAllMetadata(MDForInst); @@ -1568,6 +1573,7 @@ void AssemblyWriter::printFunction(const Function *F) { case CallingConv::Cold: Out << "coldcc "; break; case CallingConv::X86_StdCall: Out << "x86_stdcallcc "; break; case CallingConv::X86_FastCall: Out << "x86_fastcallcc "; break; + case CallingConv::X86_ThisCall: Out << "x86_thiscallcc "; break; case CallingConv::ARM_APCS: Out << "arm_apcscc "; break; case CallingConv::ARM_AAPCS: Out << "arm_aapcscc "; break; case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc "; break; @@ -1840,6 +1846,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { case CallingConv::Cold: Out << " coldcc"; break; case CallingConv::X86_StdCall: Out << " x86_stdcallcc"; break; case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break; + case CallingConv::X86_ThisCall: Out << " x86_thiscallcc"; break; case CallingConv::ARM_APCS: Out << " arm_apcscc "; break; case CallingConv::ARM_AAPCS: Out << " arm_aapcscc "; break; case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break; @@ -1892,6 +1899,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { case CallingConv::Cold: Out << " coldcc"; break; case CallingConv::X86_StdCall: Out << " x86_stdcallcc"; break; case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break; + case CallingConv::X86_ThisCall: Out << " x86_thiscallcc"; break; case CallingConv::ARM_APCS: Out << " arm_apcscc "; break; case CallingConv::ARM_AAPCS: Out << " arm_aapcscc "; break; case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break; @@ -2024,9 +2032,9 @@ static void WriteMDNodeComment(const MDNode *Node, return; ConstantInt *CI = dyn_cast_or_null<ConstantInt>(Node->getOperand(0)); if (!CI) return; - unsigned Val = CI->getZExtValue(); - unsigned Tag = Val & ~LLVMDebugVersionMask; - if (Val < LLVMDebugVersion) + APInt Val = CI->getValue(); + APInt Tag = Val & ~APInt(Val.getBitWidth(), LLVMDebugVersionMask); + if (Val.ult(LLVMDebugVersion)) return; Out.PadToColumn(50); @@ -2040,8 +2048,10 @@ static void WriteMDNodeComment(const MDNode *Node, Out << "; [ DW_TAG_vector_type ]"; else if (Tag == dwarf::DW_TAG_user_base) Out << "; [ DW_TAG_user_base ]"; - else if (const char *TagName = dwarf::TagString(Tag)) - Out << "; [ " << TagName << " ]"; + else if (Tag.isIntN(32)) { + if (const char *TagName = dwarf::TagString(Tag.getZExtValue())) + Out << "; [ " << TagName << " ]"; + } } void AssemblyWriter::writeAllMDNodes() { diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp index b28fdeb..a56938c 100644 --- a/lib/VMCore/PassManager.cpp +++ b/lib/VMCore/PassManager.cpp @@ -275,7 +275,7 @@ public: addImmutablePass(IP); recordAvailableAnalysis(IP); } else { - P->assignPassManager(activeStack); + P->assignPassManager(activeStack, PMT_FunctionPassManager); } } @@ -418,7 +418,7 @@ public: addImmutablePass(IP); recordAvailableAnalysis(IP); } else { - P->assignPassManager(activeStack); + P->assignPassManager(activeStack, PMT_ModulePassManager); } } @@ -1270,20 +1270,30 @@ FunctionPassManager::~FunctionPassManager() { delete FPM; } +/// addImpl - Add a pass to the queue of passes to run, without +/// checking whether to add a printer pass. +void FunctionPassManager::addImpl(Pass *P) { + FPM->add(P); +} + /// add - Add a pass to the queue of passes to run. This passes /// ownership of the Pass to the PassManager. When the /// PassManager_X is destroyed, the pass will be destroyed as well, so /// there is no need to delete the pass. (TODO delete passes.) /// This implies that all passes MUST be allocated with 'new'. void FunctionPassManager::add(Pass *P) { - if (ShouldPrintBeforePass(P)) - add(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ") - + P->getPassName() + " ***")); - FPM->add(P); + // If this is a not a function pass, don't add a printer for it. + if (P->getPassKind() == PT_Function) + if (ShouldPrintBeforePass(P)) + addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ") + + P->getPassName() + " ***")); - if (ShouldPrintAfterPass(P)) - add(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ") - + P->getPassName() + " ***")); + addImpl(P); + + if (P->getPassKind() == PT_Function) + if (ShouldPrintAfterPass(P)) + addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ") + + P->getPassName() + " ***")); } /// run - Execute all of the passes scheduled for execution. Keep @@ -1588,20 +1598,26 @@ PassManager::~PassManager() { delete PM; } +/// addImpl - Add a pass to the queue of passes to run, without +/// checking whether to add a printer pass. +void PassManager::addImpl(Pass *P) { + PM->add(P); +} + /// add - Add a pass to the queue of passes to run. This passes ownership of /// the Pass to the PassManager. When the PassManager is destroyed, the pass /// will be destroyed as well, so there is no need to delete the pass. This /// implies that all passes MUST be allocated with 'new'. void PassManager::add(Pass *P) { if (ShouldPrintBeforePass(P)) - add(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ") - + P->getPassName() + " ***")); + addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ") + + P->getPassName() + " ***")); - PM->add(P); + addImpl(P); if (ShouldPrintAfterPass(P)) - add(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ") - + P->getPassName() + " ***")); + addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ") + + P->getPassName() + " ***")); } /// run - Execute all of the passes scheduled for execution. Keep track of @@ -1764,7 +1780,7 @@ void BasicBlockPass::assignPassManager(PMStack &PMS, // [3] Assign manager to manage this new manager. This may create // and push new managers into PMS - BBP->assignPassManager(PMS); + BBP->assignPassManager(PMS, PreferredType); // [4] Push new manager into PMS PMS.push(BBP); diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp index a092cd1..d2a8ce3 100644 --- a/lib/VMCore/ValueTypes.cpp +++ b/lib/VMCore/ValueTypes.cpp @@ -61,6 +61,10 @@ bool EVT::isExtended256BitVector() const { return isExtendedVector() && getSizeInBits() == 256; } +bool EVT::isExtended512BitVector() const { + return isExtendedVector() && getSizeInBits() == 512; +} + EVT EVT::getExtendedVectorElementType() const { assert(isExtended() && "Type is not extended!"); return EVT::getEVT(cast<VectorType>(LLVMTy)->getElementType()); @@ -121,6 +125,7 @@ std::string EVT::getEVTString() const { case MVT::v1i64: return "v1i64"; case MVT::v2i64: return "v2i64"; case MVT::v4i64: return "v4i64"; + case MVT::v8i64: return "v8i64"; case MVT::v2f32: return "v2f32"; case MVT::v4f32: return "v4f32"; case MVT::v8f32: return "v8f32"; @@ -165,6 +170,7 @@ const Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v1i64: return VectorType::get(Type::getInt64Ty(Context), 1); case MVT::v2i64: return VectorType::get(Type::getInt64Ty(Context), 2); case MVT::v4i64: return VectorType::get(Type::getInt64Ty(Context), 4); + case MVT::v8i64: return VectorType::get(Type::getInt64Ty(Context), 8); case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2); case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4); case MVT::v8f32: return VectorType::get(Type::getFloatTy(Context), 8); diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 6ad4272..75988cc 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -688,6 +688,7 @@ void Verifier::visitFunction(Function &F) { case CallingConv::Fast: case CallingConv::Cold: case CallingConv::X86_FastCall: + case CallingConv::X86_ThisCall: Assert1(!F.isVarArg(), "Varargs functions must have C calling conventions!", &F); break; @@ -1152,7 +1153,7 @@ void Verifier::VerifyCallSite(CallSite CS) { Assert1(CS.arg_size() == FTy->getNumParams(), "Incorrect number of arguments passed to called function!", I); - // Verify that all arguments to the call match the function type... + // Verify that all arguments to the call match the function type. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) Assert3(CS.getArgument(i)->getType() == FTy->getParamType(i), "Call parameter type does not match function signature!", @@ -1179,8 +1180,8 @@ void Verifier::VerifyCallSite(CallSite CS) { } // Verify that there's no metadata unless it's a direct call to an intrinsic. - if (!CS.getCalledFunction() || CS.getCalledFunction()->getName().size() < 5 || - CS.getCalledFunction()->getName().substr(0, 5) != "llvm.") { + if (!CS.getCalledFunction() || + !CS.getCalledFunction()->getName().startswith("llvm.")) { for (FunctionType::param_iterator PI = FTy->param_begin(), PE = FTy->param_end(); PI != PE; ++PI) Assert1(!PI->get()->isMetadataTy(), |